LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/20] powerpc/mm: Make low level TLB flush ops on BookE take additional args (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

We need to pass down whether the page is direct or indirect and we'll
need to pass the page size to _tlbil_va and _tlbivax_bcast

We also add a new low level _tlbil_pid_noind() which does a TLB flush
by PID but avoids flushing indirect entries if possible

This implements those new prototypes but defines them with inlines
or macros so that no additional arguments are actually passed on current
processors.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. __tlbil_va() declaration had the wrong number of arguments
    also remove a stray trailing semicolon

 arch/powerpc/include/asm/tlbflush.h |   11 +++++++--
 arch/powerpc/mm/mmu_decl.h          |   16 +++++++++++--
 arch/powerpc/mm/tlb_nohash.c        |   42 ++++++++++++++++++++++++++----------
 arch/powerpc/mm/tlb_nohash_low.S    |    6 ++---
 4 files changed, 56 insertions(+), 19 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/tlbflush.h	2009-07-24 16:24:08.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/tlbflush.h	2009-07-24 16:24:12.000000000 +1000
@@ -6,7 +6,7 @@
  *
  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
  *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - local_flush_tlb_mm(mm) flushes the specified mm context on
+ *  - local_flush_tlb_mm(mm, full) flushes the specified mm context on
  *                           the local processor
  *  - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
  *  - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
@@ -29,7 +29,8 @@
  * specific tlbie's
  */
 
-#include <linux/mm.h>
+struct vm_area_struct;
+struct mm_struct;
 
 #define MMU_NO_CONTEXT      	((unsigned int)-1)
 
@@ -40,12 +41,18 @@ extern void flush_tlb_kernel_range(unsig
 extern void local_flush_tlb_mm(struct mm_struct *mm);
 extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 
+extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+				   int tsize, int ind);
+
 #ifdef CONFIG_SMP
 extern void flush_tlb_mm(struct mm_struct *mm);
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			     int tsize, int ind);
 #else
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma,addr)	local_flush_tlb_page(vma,addr)
+#define __flush_tlb_page(mm,addr,p,i)	__local_flush_tlb_page(mm,addr,p,i)
 #endif
 #define flush_tlb_page_nohash(vma,addr)	flush_tlb_page(vma,addr)
 
Index: linux-work/arch/powerpc/mm/mmu_decl.h
===================================================================
--- linux-work.orig/arch/powerpc/mm/mmu_decl.h	2009-07-24 16:24:08.000000000 +1000
+++ linux-work/arch/powerpc/mm/mmu_decl.h	2009-07-24 17:04:57.000000000 +1000
@@ -36,21 +36,30 @@ static inline void _tlbil_pid(unsigned i
 {
 	asm volatile ("sync; tlbia; isync" : : : "memory");
 }
+#define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
+
 #else /* CONFIG_40x || CONFIG_8xx */
 extern void _tlbil_all(void);
 extern void _tlbil_pid(unsigned int pid);
+#define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
 #endif /* !(CONFIG_40x || CONFIG_8xx) */
 
 /*
  * On 8xx, we directly inline tlbie, on others, it's extern
  */
 #ifdef CONFIG_8xx
-static inline void _tlbil_va(unsigned long address, unsigned int pid)
+static inline void _tlbil_va(unsigned long address, unsigned int pid,
+			     unsigned int tsize, unsigned int ind)
 {
 	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
 }
 #else /* CONFIG_8xx */
-extern void _tlbil_va(unsigned long address, unsigned int pid);
+extern void __tlbil_va(unsigned long address, unsigned int pid);
+static inline void _tlbil_va(unsigned long address, unsigned int pid,
+			     unsigned int tsize, unsigned int ind)
+{
+	__tlbil_va(address, pid);
+}
 #endif /* CONIFG_8xx */
 
 /*
@@ -58,7 +67,8 @@ extern void _tlbil_va(unsigned long addr
  * implementation. When that becomes the case, this will be
  * an extern.
  */
-static inline void _tlbivax_bcast(unsigned long address, unsigned int pid)
+static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
+				   unsigned int tsize, unsigned int ind)
 {
 	BUG();
 }
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash.c	2009-07-24 16:24:08.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c	2009-07-24 17:41:33.000000000 +1000
@@ -67,18 +67,24 @@ void local_flush_tlb_mm(struct mm_struct
 }
 EXPORT_SYMBOL(local_flush_tlb_mm);
 
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			    int tsize, int ind)
 {
 	unsigned int pid;
 
 	preempt_disable();
-	pid = vma ? vma->vm_mm->context.id : 0;
+	pid = mm ? mm->context.id : 0;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbil_va(vmaddr, pid);
+		_tlbil_va(vmaddr, pid, tsize, ind);
 	preempt_enable();
 }
-EXPORT_SYMBOL(local_flush_tlb_page);
 
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	__local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			       0 /* tsize unused for now */, 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page);
 
 /*
  * And here are the SMP non-local implementations
@@ -96,6 +102,8 @@ static int mm_is_core_local(struct mm_st
 struct tlb_flush_param {
 	unsigned long addr;
 	unsigned int pid;
+	unsigned int tsize;
+	unsigned int ind;
 };
 
 static void do_flush_tlb_mm_ipi(void *param)
@@ -109,7 +117,7 @@ static void do_flush_tlb_page_ipi(void *
 {
 	struct tlb_flush_param *p = param;
 
-	_tlbil_va(p->addr, p->pid);
+	_tlbil_va(p->addr, p->pid, p->tsize, p->ind);
 }
 
 
@@ -149,37 +157,49 @@ void flush_tlb_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(flush_tlb_mm);
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+		      int tsize, int ind)
 {
 	struct cpumask *cpu_mask;
 	unsigned int pid;
 
 	preempt_disable();
-	pid = vma ? vma->vm_mm->context.id : 0;
+	pid = mm ? mm->context.id : 0;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		goto bail;
-	cpu_mask = mm_cpumask(vma->vm_mm);
+	cpu_mask = mm_cpumask(mm);
 	if (!mm_is_core_local(mm)) {
 		/* If broadcast tlbivax is supported, use it */
 		if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
 			int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
 			if (lock)
 				spin_lock(&tlbivax_lock);
-			_tlbivax_bcast(vmaddr, pid);
+			_tlbivax_bcast(vmaddr, pid, tsize, ind);
 			if (lock)
 				spin_unlock(&tlbivax_lock);
 			goto bail;
 		} else {
-			struct tlb_flush_param p = { .pid = pid, .addr = vmaddr };
+			struct tlb_flush_param p = {
+				.pid = pid,
+				.addr = vmaddr,
+				.tsize = tsize,
+				.ind = ind,
+			};
 			/* Ignores smp_processor_id() even if set in cpu_mask */
 			smp_call_function_many(cpu_mask,
 					       do_flush_tlb_page_ipi, &p, 1);
 		}
 	}
-	_tlbil_va(vmaddr, pid);
+	_tlbil_va(vmaddr, pid, tsize, ind);
  bail:
 	preempt_enable();
 }
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			 0 /* tsize unused for now */, 0);
+}
 EXPORT_SYMBOL(flush_tlb_page);
 
 #endif /* CONFIG_SMP */
Index: linux-work/arch/powerpc/mm/tlb_nohash_low.S
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash_low.S	2009-07-24 16:24:08.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash_low.S	2009-07-24 17:04:57.000000000 +1000
@@ -39,7 +39,7 @@
 /*
  * 40x implementation needs only tlbil_va
  */
-_GLOBAL(_tlbil_va)
+_GLOBAL(__tlbil_va)
 	/* We run the search with interrupts disabled because we have to change
 	 * the PID and I don't want to preempt when that happens.
 	 */
@@ -71,7 +71,7 @@ _GLOBAL(_tlbil_va)
  * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
  * of the TLB for everything else.
  */
-_GLOBAL(_tlbil_va)
+_GLOBAL(__tlbil_va)
 	mfspr	r5,SPRN_MMUCR
 	rlwimi	r5,r4,0,24,31			/* Set TID */
 
@@ -170,7 +170,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_US
  * Flush MMU TLB for a particular address, but only on the local processor
  * (no broadcast)
  */
-_GLOBAL(_tlbil_va)
+_GLOBAL(__tlbil_va)
 	mfmsr	r10
 	wrteei	0
 	slwi	r4,r4,16

^ permalink raw reply

* [PATCH 9/20] powerpc/mm: Call mmu_context_init() from ppc64 (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

Our 64-bit hash context handling has no init function, but 64-bit Book3E
will use the common mmu_context_nohash.c code which does, so define an
empty inline mmu_context_init() for 64-bit server and call it from
our 64-bit setup_arch()

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Kumar Gala <galak@kernel.crashing.org>
---

v2. Remove whitespace addition to mmu_context_hash64.c

 arch/powerpc/include/asm/mmu_context.h |    7 ++++++-
 arch/powerpc/kernel/setup_64.c         |    4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

--- linux-work.orig/arch/powerpc/include/asm/mmu_context.h	2009-07-22 16:25:25.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu_context.h	2009-07-22 16:25:50.000000000 +1000
@@ -14,7 +14,6 @@
 /*
  * Most if the context management is out of line
  */
-extern void mmu_context_init(void);
 extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 extern void destroy_context(struct mm_struct *mm);
 
@@ -23,6 +22,12 @@ extern void switch_stab(struct task_stru
 extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
 extern void set_context(unsigned long id, pgd_t *pgd);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void mmu_context_init(void) { }
+#else
+extern void mmu_context_init(void);
+#endif
+
 /*
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
Index: linux-work/arch/powerpc/kernel/setup_64.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_64.c	2009-07-22 16:26:23.000000000 +1000
+++ linux-work/arch/powerpc/kernel/setup_64.c	2009-07-22 16:26:31.000000000 +1000
@@ -534,6 +534,10 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	paging_init();
+
+	/* Initialize the MMU context management stuff */
+	mmu_context_init();
+
 	ppc64_boot_msg(0x15, "Setup Done");
 }
 

^ permalink raw reply

* [PATCH 10/20] powerpc: Clean ifdef usage in copy_thread()
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

Currently, a single ifdef covers SLB related bits and more generic ppc64
related bits, split this in two separate ifdef's since 64-bit BookE will
need one but not the other.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/kernel/process.c |    2 ++
 1 file changed, 2 insertions(+)

--- linux-work.orig/arch/powerpc/kernel/process.c	2009-07-22 16:30:49.000000000 +1000
+++ linux-work/arch/powerpc/kernel/process.c	2009-07-22 16:31:02.000000000 +1000
@@ -664,6 +664,7 @@ int copy_thread(unsigned long clone_flag
 		sp_vsid |= SLB_VSID_KERNEL | llp;
 		p->thread.ksp_vsid = sp_vsid;
 	}
+#endif /* CONFIG_PPC_STD_MMU_64 */
 
 	/*
 	 * The PPC64 ABI makes use of a TOC to contain function 
@@ -671,6 +672,7 @@ int copy_thread(unsigned long clone_flag
 	 * to the TOC entry.  The first entry is a pointer to the actual
 	 * function.
  	 */
+#ifdef CONFIG_PPC64
 	kregs->nip = *((unsigned long *)ret_from_fork);
 #else
 	kregs->nip = (unsigned long)ret_from_fork;

^ permalink raw reply

* [PATCH 11/20] powerpc: Move definitions of secondary CPU spinloop to header file (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

Those definitions are currently declared extern in the .c file where
they are used, move them to a header file instead.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

v2. Move more definitions to the header and update more call sites

 arch/powerpc/include/asm/smp.h            |    9 +++++++++
 arch/powerpc/kernel/prom_init.c           |    4 ----
 arch/powerpc/kernel/setup_64.c            |    3 ---
 arch/powerpc/platforms/85xx/smp.c         |    1 -
 arch/powerpc/platforms/86xx/mpc86xx_smp.c |    1 -
 arch/powerpc/platforms/cell/smp.c         |    2 --
 arch/powerpc/platforms/pseries/smp.c      |    2 --
 7 files changed, 9 insertions(+), 13 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/smp.h	2009-07-24 15:31:07.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/smp.h	2009-07-24 16:03:50.000000000 +1000
@@ -148,6 +148,15 @@ extern struct smp_ops_t *smp_ops;
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi(cpumask_t mask);
 
+/* Definitions relative to the secondary CPU spin loop
+ * and entry point. Not all of them exist on both 32 and
+ * 64-bit but defining them all here doesn't harm
+ */
+extern void generic_secondary_smp_init(void);
+extern unsigned long __secondary_hold_spinloop;
+extern unsigned long __secondary_hold_acknowledge;
+extern char __secondary_hold;
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
Index: linux-work/arch/powerpc/kernel/setup_64.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_64.c	2009-07-24 15:32:37.000000000 +1000
+++ linux-work/arch/powerpc/kernel/setup_64.c	2009-07-24 15:57:00.000000000 +1000
@@ -230,9 +230,6 @@ void early_setup_secondary(void)
 #endif /* CONFIG_SMP */
 
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
-extern unsigned long __secondary_hold_spinloop;
-extern void generic_secondary_smp_init(void);
-
 void smp_release_cpus(void)
 {
 	unsigned long *ptr;
Index: linux-work/arch/powerpc/kernel/prom_init.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/prom_init.c	2009-07-24 16:01:44.000000000 +1000
+++ linux-work/arch/powerpc/kernel/prom_init.c	2009-07-24 16:02:27.000000000 +1000
@@ -1259,10 +1259,6 @@ static void __init prom_initialize_tce_t
  *
  * -- Cort
  */
-extern char __secondary_hold;
-extern unsigned long __secondary_hold_spinloop;
-extern unsigned long __secondary_hold_acknowledge;
-
 /*
  * We want to reference the copy of __secondary_hold_* in the
  * 0 - 0x100 address range
Index: linux-work/arch/powerpc/platforms/85xx/smp.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/85xx/smp.c	2009-07-24 16:02:47.000000000 +1000
+++ linux-work/arch/powerpc/platforms/85xx/smp.c	2009-07-24 16:02:54.000000000 +1000
@@ -25,7 +25,6 @@
 
 #include <sysdev/fsl_soc.h>
 
-extern volatile unsigned long __secondary_hold_acknowledge;
 extern void __early_start(void);
 
 #define BOOT_ENTRY_ADDR_UPPER	0
Index: linux-work/arch/powerpc/platforms/86xx/mpc86xx_smp.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/86xx/mpc86xx_smp.c	2009-07-24 16:03:06.000000000 +1000
+++ linux-work/arch/powerpc/platforms/86xx/mpc86xx_smp.c	2009-07-24 16:03:11.000000000 +1000
@@ -27,7 +27,6 @@
 #include "mpc86xx.h"
 
 extern void __secondary_start_mpc86xx(void);
-extern unsigned long __secondary_hold_acknowledge;
 
 #define MCM_PORT_CONFIG_OFFSET	0x10
 
Index: linux-work/arch/powerpc/platforms/cell/smp.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/cell/smp.c	2009-07-24 16:00:37.000000000 +1000
+++ linux-work/arch/powerpc/platforms/cell/smp.c	2009-07-24 16:00:56.000000000 +1000
@@ -58,8 +58,6 @@
  */
 static cpumask_t of_spin_map;
 
-extern void generic_secondary_smp_init(unsigned long);
-
 /**
  * smp_startup_cpu() - start the given cpu
  *
Index: linux-work/arch/powerpc/platforms/pseries/smp.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/pseries/smp.c	2009-07-24 16:01:23.000000000 +1000
+++ linux-work/arch/powerpc/platforms/pseries/smp.c	2009-07-24 16:01:25.000000000 +1000
@@ -56,8 +56,6 @@
  */
 static cpumask_t of_spin_map;
 
-extern void generic_secondary_smp_init(unsigned long);
-
 /**
  * smp_startup_cpu() - start the given cpu
  *

^ permalink raw reply

* [PATCH 12/20] powerpc/mm: Rework & cleanup page table freeing code path
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

That patch used to just add a hook to page table flushing but
pulling that string brought out a whole bunch of issues, so it
now does that and more:

 - We now make the RCU batching of page freeing SMP only, as I
believe it was intended initially. We make a few more things compile
to nothing on !CONFIG_SMP

 - Some macros are turned into functions, though that forced me to
out of line a few stuffs due to unsolvable include depenencies,
however it's probably better that way anyway, it's not -that-
critical code path.

 - 32-bit didn't call pte_free_finish() on tlb_flush() which means
that it wouldn't push out the batch to RCU for delayed freeing when
a bunch of page tables have been freed, they would just stay in there
until the batch gets full.

64-bit BookE will use that hook to maintain the virtually linear
page tables or the indirect entries in the TLB when using the
HW loader.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/include/asm/pgalloc.h |   39 ++++++++++++++++++++++++++-----------
 arch/powerpc/include/asm/tlb.h     |   38 ++----------------------------------
 arch/powerpc/mm/pgtable.c          |   10 +++++++++
 arch/powerpc/mm/tlb_hash32.c       |    3 ++
 arch/powerpc/mm/tlb_hash64.c       |   15 ++++++++++++++
 arch/powerpc/mm/tlb_nohash.c       |    8 +++++++
 6 files changed, 67 insertions(+), 46 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/pgalloc.h	2009-07-24 17:39:52.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pgalloc.h	2009-07-24 17:41:40.000000000 +1000
@@ -4,6 +4,15 @@
 
 #include <linux/mm.h>
 
+#ifdef CONFIG_PPC_BOOK3E
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else /* CONFIG_PPC_BOOK3E */
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+				     unsigned long address)
+{
+}
+#endif /* !CONFIG_PPC_BOOK3E */
+
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
@@ -35,19 +44,27 @@ static inline pgtable_free_t pgtable_fre
 #include <asm/pgalloc-32.h>
 #endif
 
-extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
-
 #ifdef CONFIG_SMP
-#define __pte_free_tlb(tlb,ptepage,address)		\
-do { \
-	pgtable_page_dtor(ptepage); \
-	pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
-					PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
-} while (0)
-#else
-#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, (pte))
-#endif
+extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+extern void pte_free_finish(void);
+#else /* CONFIG_SMP */
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+{
+	pgtable_free(pgf);
+}
+static inline void pte_free_finish(void) { }
+#endif /* !CONFIG_SMP */
 
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
+				  unsigned long address)
+{
+	pgtable_free_t pgf = pgtable_free_cache(page_address(ptepage),
+						PTE_NONCACHE_NUM,
+						PTE_TABLE_SIZE-1);
+ 	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(ptepage);
+	pgtable_free_tlb(tlb, pgf);
+}
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_PGALLOC_H */
Index: linux-work/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/tlb.h	2009-07-24 17:39:52.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/tlb.h	2009-07-24 17:41:40.000000000 +1000
@@ -25,57 +25,25 @@
 
 #include <linux/pagemap.h>
 
-struct mmu_gather;
-
 #define tlb_start_vma(tlb, vma)	do { } while (0)
 #define tlb_end_vma(tlb, vma)	do { } while (0)
 
-#if !defined(CONFIG_PPC_STD_MMU)
-
-#define tlb_flush(tlb)			flush_tlb_mm((tlb)->mm)
-
-#elif defined(__powerpc64__)
-
-extern void pte_free_finish(void);
-
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
-
-	/* If there's a TLB batch pending, then we must flush it because the
-	 * pages are going to be freed and we really don't want to have a CPU
-	 * access a freed page because it has a stale TLB
-	 */
-	if (tlbbatch->index)
-		__flush_tlb_pending(tlbbatch);
-
-	pte_free_finish();
-}
-
-#else
-
 extern void tlb_flush(struct mmu_gather *tlb);
 
-#endif
-
 /* Get the generic bits... */
 #include <asm-generic/tlb.h>
 
-#if !defined(CONFIG_PPC_STD_MMU) || defined(__powerpc64__)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-
-#else
 extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
 			     unsigned long address);
 
 static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
-					unsigned long address)
+					  unsigned long address)
 {
+#ifdef PPC_STD_MMU_32
 	if (pte_val(*ptep) & _PAGE_HASHPTE)
 		flush_hash_entry(tlb->mm, ptep, address);
+#endif
 }
 
-#endif
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_TLB_H */
Index: linux-work/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/pgtable.c	2009-07-24 17:39:52.000000000 +1000
+++ linux-work/arch/powerpc/mm/pgtable.c	2009-07-24 17:41:40.000000000 +1000
@@ -30,6 +30,14 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+#ifdef CONFIG_SMP
+
+/*
+ * Handle batching of page table freeing on SMP. Page tables are
+ * queued up and send to be freed later by RCU in order to avoid
+ * freeing a page table page that is being walked without locks
+ */
+
 static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 static unsigned long pte_freelist_forced_free;
 
@@ -116,6 +124,8 @@ void pte_free_finish(void)
 	*batchp = NULL;
 }
 
+#endif /* CONFIG_SMP */
+
 /*
  * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
  */
Index: linux-work/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_hash32.c	2009-07-24 17:39:52.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_hash32.c	2009-07-24 17:41:40.000000000 +1000
@@ -71,6 +71,9 @@ void tlb_flush(struct mmu_gather *tlb)
 		 */
 		_tlbia();
 	}
+
+	/* Push out batch of freed page tables */
+	pte_free_finish();
 }
 
 /*
Index: linux-work/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_hash64.c	2009-07-24 17:39:52.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_hash64.c	2009-07-24 17:41:40.000000000 +1000
@@ -154,6 +154,21 @@ void __flush_tlb_pending(struct ppc64_tl
 	batch->index = 0;
 }
 
+void tlb_flush(struct mmu_gather *tlb)
+{
+	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+
+	/* If there's a TLB batch pending, then we must flush it because the
+	 * pages are going to be freed and we really don't want to have a CPU
+	 * access a freed page because it has a stale TLB
+	 */
+	if (tlbbatch->index)
+		__flush_tlb_pending(tlbbatch);
+
+	/* Push out batch of freed page tables */
+	pte_free_finish();
+}
+
 /**
  * __flush_hash_table_range - Flush all HPTEs for a given address range
  *                            from the hash table (and the TLB). But keeps
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash.c	2009-07-24 17:41:33.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c	2009-07-24 17:41:40.000000000 +1000
@@ -233,3 +233,11 @@ void flush_tlb_range(struct vm_area_stru
 	flush_tlb_mm(vma->vm_mm);
 }
 EXPORT_SYMBOL(flush_tlb_range);
+
+void tlb_flush(struct mmu_gather *tlb)
+{
+	flush_tlb_mm(tlb->mm);
+
+	/* Push out batch of freed page tables */
+	pte_free_finish();
+}

^ permalink raw reply

* [PATCH 13/20] powerpc: Add SPR definitions for new 64-bit BookE (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This adds various SPRs defined on 64-bit BookE, along with changes
to the definition of the base MSR values to add the values needed
for 64-bit Book3E.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Remove trailing whitespace and add comments about the EPCR bits
    Don't break 8xx due to missing MSR_USER definition

 arch/powerpc/include/asm/reg.h       |   10 ++------
 arch/powerpc/include/asm/reg_booke.h |   42 ++++++++++++++++++++++++++++++++---
 2 files changed, 42 insertions(+), 10 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/reg.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg.h	2009-07-24 18:40:53.000000000 +1000
@@ -98,19 +98,15 @@
 #define MSR_RI		__MASK(MSR_RI_LG)	/* Recoverable Exception */
 #define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
 
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC_BOOK3S_64)
+/* Server variant */
 #define MSR_		MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
 #define MSR_KERNEL      MSR_ | MSR_SF
-
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE
 #define MSR_USER64	MSR_USER32 | MSR_SF
-
-#else /* 32-bit */
+#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
 /* Default MSR for kernel mode. */
-#ifndef MSR_KERNEL	/* reg_booke.h also defines this */
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
-#endif
-
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
Index: linux-work/arch/powerpc/include/asm/reg_booke.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/reg_booke.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg_booke.h	2009-07-24 18:14:47.000000000 +1000
@@ -18,18 +18,26 @@
 #define MSR_IS		MSR_IR	/* Instruction Space */
 #define MSR_DS		MSR_DR	/* Data Space */
 #define MSR_PMM		(1<<2)	/* Performance monitor mark bit */
+#define MSR_CM		(1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
 
-/* Default MSR for kernel mode. */
-#if defined (CONFIG_40x)
+#if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_		MSR_ME | MSR_CE
+#define MSR_KERNEL      MSR_ | MSR_CM
+#define MSR_USER32	MSR_ | MSR_PR | MSR_EE
+#define MSR_USER64	MSR_USER32 | MSR_CM
+#elif defined (CONFIG_40x)
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
-#elif defined(CONFIG_BOOKE)
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
+#else
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_CE)
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
 /* Special Purpose Registers (SPRNs)*/
 #define SPRN_DECAR	0x036	/* Decrementer Auto Reload Register */
 #define SPRN_IVPR	0x03F	/* Interrupt Vector Prefix Register */
 #define SPRN_USPRG0	0x100	/* User Special Purpose Register General 0 */
+#define SPRN_SPRG3R	0x103	/* Special Purpose Register General 3 Read */
 #define SPRN_SPRG4R	0x104	/* Special Purpose Register General 4 Read */
 #define SPRN_SPRG5R	0x105	/* Special Purpose Register General 5 Read */
 #define SPRN_SPRG6R	0x106	/* Special Purpose Register General 6 Read */
@@ -38,11 +46,18 @@
 #define SPRN_SPRG5W	0x115	/* Special Purpose Register General 5 Write */
 #define SPRN_SPRG6W	0x116	/* Special Purpose Register General 6 Write */
 #define SPRN_SPRG7W	0x117	/* Special Purpose Register General 7 Write */
+#define SPRN_EPCR	0x133	/* Embedded Processor Control Register */
 #define SPRN_DBCR2	0x136	/* Debug Control Register 2 */
 #define SPRN_IAC3	0x13A	/* Instruction Address Compare 3 */
 #define SPRN_IAC4	0x13B	/* Instruction Address Compare 4 */
 #define SPRN_DVC1	0x13E	/* Data Value Compare Register 1 */
 #define SPRN_DVC2	0x13F	/* Data Value Compare Register 2 */
+#define SPRN_MAS8	0x155	/* MMU Assist Register 8 */
+#define SPRN_TLB0PS	0x158	/* TLB 0 Page Size Register */
+#define SPRN_MAS5_MAS6	0x15c	/* MMU Assist Register 5 || 6 */
+#define SPRN_MAS8_MAS1	0x15d	/* MMU Assist Register 8 || 1 */
+#define SPRN_MAS7_MAS3	0x174	/* MMU Assist Register 7 || 3 */
+#define SPRN_MAS0_MAS1	0x175	/* MMU Assist Register 0 || 1 */
 #define SPRN_IVOR0	0x190	/* Interrupt Vector Offset Register 0 */
 #define SPRN_IVOR1	0x191	/* Interrupt Vector Offset Register 1 */
 #define SPRN_IVOR2	0x192	/* Interrupt Vector Offset Register 2 */
@@ -425,6 +440,27 @@
 #define SGR_NORMAL	0		/* Speculative fetching allowed. */
 #define SGR_GUARDED	1		/* Speculative fetching disallowed. */
 
+/* Bit definitions for EPCR */
+#define SPRN_EPCR_EXTGS		0x80000000	/* External Input interrupt
+						 * directed to Guest state */
+#define SPRN_EPCR_DTLBGS	0x40000000	/* Data TLB Error interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_ITLBGS	0x20000000	/* Instr. TLB error interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_DSIGS		0x10000000	/* Data Storage interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_ISIGS		0x08000000	/* Instr. Storage interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_DUVD		0x04000000	/* Disable Hypervisor Debug */
+#define SPRN_EPCR_ICM		0x02000000	/* Interrupt computation mode
+						 * (copied to MSR:CM on intr) */
+#define SPRN_EPCR_GICM		0x01000000	/* Guest Interrupt Comp. mode */
+#define SPRN_EPCR_DGTMI		0x00800000	/* Disable TLB Guest Management
+						 * instructions */
+#define SPRN_EPCR_DMIUH		0x00400000	/* Disable MAS Interrupt updates
+						 * for hypervisor */
+
+
 /*
  * The IBM-403 is an even more odd special case, as it is much
  * older than the IBM-405 series.  We put these down here incase someone

^ permalink raw reply

* [PATCH 14/20] powerpc: Add memory management headers for new 64-bit BookE (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This adds the PTE and pgtable format definitions, along with changes
to the kernel memory map and other definitions related to implementing
support for 64-bit Book3E. This also shields some asm-offset bits that
are currently only relevant on 32-bit

We also move the definition of the "linux" page size constants to
the common mmu.h file and add a few sizes that are relevant to
embedded processors.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Move the list of page size constants to a generic location for
    use by all cpu types

 arch/powerpc/include/asm/mmu-book3e.h    |   27 +++++++++++
 arch/powerpc/include/asm/mmu-hash64.h    |   20 --------
 arch/powerpc/include/asm/mmu.h           |   32 ++++++++++++++
 arch/powerpc/include/asm/page.h          |    4 +
 arch/powerpc/include/asm/page_64.h       |   10 ++++
 arch/powerpc/include/asm/pgtable-ppc64.h |   61 +++++++++++++++++++--------
 arch/powerpc/include/asm/pte-book3e.h    |   70 +++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/pte-common.h    |    3 +
 arch/powerpc/kernel/asm-offsets.c        |    5 +-
 9 files changed, 194 insertions(+), 38 deletions(-)

--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/include/asm/pte-book3e.h	2009-07-24 18:14:49.000000000 +1000
@@ -0,0 +1,70 @@
+#ifndef _ASM_POWERPC_PTE_BOOK3E_H
+#define _ASM_POWERPC_PTE_BOOK3E_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for processors compliant to the Book3E
+ * architecture 2.06 or later. The position of the PTE bits
+ * matches the HW definition of the optional Embedded Page Table
+ * category.
+ */
+
+/* Architected bits */
+#define _PAGE_PRESENT	0x000001 /* software: pte contains a translation */
+#define _PAGE_FILE	0x000002 /* (!present only) software: pte holds file offset */
+#define _PAGE_SW1	0x000002
+#define _PAGE_BAP_SR	0x000004
+#define _PAGE_BAP_UR	0x000008
+#define _PAGE_BAP_SW	0x000010
+#define _PAGE_BAP_UW	0x000020
+#define _PAGE_BAP_SX	0x000040
+#define _PAGE_BAP_UX	0x000080
+#define _PAGE_PSIZE_MSK	0x000f00
+#define _PAGE_PSIZE_4K	0x000200
+#define _PAGE_PSIZE_64K	0x000600
+#define _PAGE_PSIZE_1M	0x000a00
+#define _PAGE_PSIZE_16M	0x000e00
+#define _PAGE_DIRTY	0x001000 /* C: page changed */
+#define _PAGE_SW0	0x002000
+#define _PAGE_U3	0x004000
+#define _PAGE_U2	0x008000
+#define _PAGE_U1	0x010000
+#define _PAGE_U0	0x020000
+#define _PAGE_ACCESSED	0x040000
+#define _PAGE_LENDIAN	0x080000
+#define _PAGE_GUARDED	0x100000
+#define _PAGE_COHERENT	0x200000 /* M: enforce memory coherence */
+#define _PAGE_NO_CACHE	0x400000 /* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x800000 /* W: cache write-through */
+
+/* "Higher level" linux bit combinations */
+#define _PAGE_EXEC	_PAGE_BAP_SX /* Can be executed from potentially */
+#define _PAGE_HWEXEC	_PAGE_BAP_UX /* .. and was cache cleaned */
+#define _PAGE_RW	(_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+#define _PAGE_KERNEL_RW	(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO	(_PAGE_BAP_SR)
+#define _PAGE_USER	(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+
+#define _PAGE_HASHPTE	0
+#define _PAGE_BUSY	0
+
+#define _PAGE_SPECIAL	_PAGE_SW0
+
+/* Flags to be preserved on PTE modifications */
+#define _PAGE_HPTEFLAGS	_PAGE_BUSY
+
+/* Base page size */
+#ifdef CONFIG_PPC_64K_PAGES
+#define _PAGE_PSIZE	_PAGE_PSIZE_64K
+#define PTE_RPN_SHIFT	(28)
+#else
+#define _PAGE_PSIZE	_PAGE_PSIZE_4K
+#define	PTE_RPN_SHIFT	(24)
+#endif
+
+/* On 32-bit, we never clear the top part of the PTE */
+#ifdef CONFIG_PPC32
+#define _PTE_NONE_MASK	0xffffffff00000000ULL
+#endif
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_PTE_FSL_BOOKE_H */
Index: linux-work/arch/powerpc/include/asm/pgtable-ppc64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgtable-ppc64.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pgtable-ppc64.h	2009-07-24 18:15:35.000000000 +1000
@@ -5,11 +5,6 @@
  * the ppc64 hashed page table.
  */
 
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <asm/tlbflush.h>
-#endif /* __ASSEMBLY__ */
-
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/pgtable-ppc64-64k.h>
 #else
@@ -38,26 +33,46 @@
 #endif
 
 /*
- * Define the address range of the vmalloc VM area.
+ * Define the address range of the kernel non-linear virtual area
+ */
+
+#ifdef CONFIG_PPC_BOOK3E
+#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
+#else
+#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#endif
+#define KERN_VIRT_SIZE	PGTABLE_RANGE
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies half of it on hash CPUs and a quarter of it on Book3E
  */
-#define VMALLOC_START ASM_CONST(0xD000000000000000)
-#define VMALLOC_SIZE  (PGTABLE_RANGE >> 1)
-#define VMALLOC_END   (VMALLOC_START + VMALLOC_SIZE)
+#define VMALLOC_START	KERN_VIRT_START
+#ifdef CONFIG_PPC_BOOK3E
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
+#else
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
+#endif
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
 /*
- * Define the address ranges for MMIO and IO space :
+ * The second half of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
  *
- *  ISA_IO_BASE = VMALLOC_END, 64K reserved area
+ *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
  *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
  * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
  */
+#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
 #define FULL_IO_SIZE	0x80000000ul
-#define  ISA_IO_BASE	(VMALLOC_END)
-#define  ISA_IO_END	(VMALLOC_END + 0x10000ul)
+#define  ISA_IO_BASE	(KERN_IO_START)
+#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
 #define  PHB_IO_BASE	(ISA_IO_END)
-#define  PHB_IO_END	(VMALLOC_END + FULL_IO_SIZE)
+#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
 #define IOREMAP_BASE	(PHB_IO_END)
-#define IOREMAP_END	(VMALLOC_START + PGTABLE_RANGE)
+#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
+
 
 /*
  * Region IDs
@@ -72,19 +87,28 @@
 #define USER_REGION_ID		(0UL)
 
 /*
- * Defines the address of the vmemap area, in its own region
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs and after the vmalloc space on Book3E
  */
+#ifdef CONFIG_PPC_BOOK3E
+#define VMEMMAP_BASE		VMALLOC_END
+#define VMEMMAP_END		KERN_IO_START
+#else
 #define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
+#endif
 #define vmemmap			((struct page *)VMEMMAP_BASE)
 
 
 /*
  * Include the PTE bits definitions
  */
+#ifdef CONFIG_PPC_BOOK3S
 #include <asm/pte-hash64.h>
+#else
+#include <asm/pte-book3e.h>
+#endif
 #include <asm/pte-common.h>
 
-
 #ifdef CONFIG_PPC_MM_SLICES
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
@@ -92,6 +116,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/stddef.h>
+#include <asm/tlbflush.h>
+
 /*
  * This is the default implementation of various PTE accessors, it's
  * used in all cases except Book3S with 64K pages where we have a
Index: linux-work/arch/powerpc/include/asm/pte-common.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pte-common.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pte-common.h	2009-07-24 18:14:49.000000000 +1000
@@ -34,6 +34,9 @@
 #ifndef _PAGE_4K_PFN
 #define _PAGE_4K_PFN		0
 #endif
+#ifndef _PAGE_SAO
+#define _PAGE_SAO	0
+#endif
 #ifndef _PAGE_PSIZE
 #define _PAGE_PSIZE		0
 #endif
Index: linux-work/arch/powerpc/include/asm/mmu-book3e.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-book3e.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-book3e.h	2009-07-24 18:15:35.000000000 +1000
@@ -170,6 +170,33 @@ typedef struct {
 	unsigned int	active;
 	unsigned long	vdso_base;
 } mm_context_t;
+
+/* Page size definitions, common between 32 and 64-bit
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    penc  : is the pte encoding mask
+ *
+ */
+struct mmu_psize_def
+{
+	unsigned int	shift;	/* number of bits */
+	unsigned int	enc;	/* PTE encoding */
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+/* The page sizes use the same names as 64-bit hash but are
+ * constants
+ */
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize	MMU_PAGE_4K
+#elif defined(CONFIG_PPC_64K_PAGES)
+#define mmu_virtual_psize	MMU_PAGE_64K
+#else
+#error Unsupported page size
+#endif
+
+extern int mmu_linear_psize;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
Index: linux-work/arch/powerpc/include/asm/page_64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/page_64.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/page_64.h	2009-07-24 18:14:49.000000000 +1000
@@ -135,12 +135,22 @@ extern void slice_set_range_psize(struct
 #endif /* __ASSEMBLY__ */
 #else
 #define slice_init()
+#ifdef CONFIG_PPC_STD_MMU_64
 #define get_slice_psize(mm, addr)	((mm)->context.user_psize)
 #define slice_set_user_psize(mm, psize)		\
 do {						\
 	(mm)->context.user_psize = (psize);	\
 	(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
 } while (0)
+#else /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr)	MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr)	MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize)	do { BUG(); } while(0)
+#endif /* !CONFIG_PPC_STD_MMU_64 */
+
 #define slice_set_range_psize(mm, start, len, psize)	\
 	slice_set_user_psize((mm), (psize))
 #define slice_mm_new_context(mm)	1
Index: linux-work/arch/powerpc/include/asm/mmu.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu.h	2009-07-24 18:21:12.000000000 +1000
@@ -17,6 +17,7 @@
 #define MMU_FTR_TYPE_40x		ASM_CONST(0x00000004)
 #define MMU_FTR_TYPE_44x		ASM_CONST(0x00000008)
 #define MMU_FTR_TYPE_FSL_E		ASM_CONST(0x00000010)
+#define MMU_FTR_TYPE_3E			ASM_CONST(0x00000020)
 
 /*
  * This is individual features
@@ -73,6 +74,36 @@ extern void early_init_mmu_secondary(voi
 
 #endif /* !__ASSEMBLY__ */
 
+/* The kernel use the constants below to index in the page sizes array.
+ * The use of fixed constants for this purpose is better for performances
+ * of the low level hash refill handlers.
+ *
+ * A non supported page size has a "shift" field set to 0
+ *
+ * Any new page size being implemented can get a new entry in here. Whether
+ * the kernel will use it or not is a different matter though. The actual page
+ * size used by hugetlbfs is not defined here and may be made variable
+ *
+ * Note: This array ended up being a false good idea as it's growing to the
+ * point where I wonder if we should replace it with something different,
+ * to think about, feedback welcome. --BenH.
+ */
+
+/* There are #define as they have to be used in assembly */
+#define MMU_PAGE_4K	0
+#define MMU_PAGE_16K	1
+#define MMU_PAGE_64K	2
+#define MMU_PAGE_64K_AP	3	/* "Admixed pages" (hash64 only) */
+#define MMU_PAGE_256K	4
+#define MMU_PAGE_1M	5
+#define MMU_PAGE_8M	6
+#define MMU_PAGE_16M	7
+#define MMU_PAGE_256M	8
+#define MMU_PAGE_1G	9
+#define MMU_PAGE_16G	10
+#define MMU_PAGE_64G	11
+#define MMU_PAGE_COUNT	12
+
 
 #if defined(CONFIG_PPC_STD_MMU_64)
 /* 64-bit classic hash table MMU */
@@ -94,5 +125,6 @@ extern void early_init_mmu_secondary(voi
 #  include <asm/mmu-8xx.h>
 #endif
 
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MMU_H_ */
Index: linux-work/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/asm-offsets.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/kernel/asm-offsets.c	2009-07-24 18:19:55.000000000 +1000
@@ -52,9 +52,11 @@
 #include <linux/kvm_host.h>
 #endif
 
+#ifdef CONFIG_PPC32
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 #include "head_booke.h"
 #endif
+#endif
 
 #if defined(CONFIG_FSL_BOOKE)
 #include "../mm/mmu_decl.h"
@@ -260,6 +262,7 @@ int main(void)
 	DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
 #endif /* CONFIG_PPC64 */
 
+#if defined(CONFIG_PPC32)
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 	DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
 	DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
@@ -278,7 +281,7 @@ int main(void)
 	DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
 	DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
 #endif
-
+#endif
 	DEFINE(CLONE_VM, CLONE_VM);
 	DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
 
Index: linux-work/arch/powerpc/include/asm/page.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/page.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/page.h	2009-07-24 18:14:49.000000000 +1000
@@ -139,7 +139,11 @@ extern phys_addr_t kernstart_addr;
  * Don't compare things with KERNELBASE or PAGE_OFFSET to test for
  * "kernelness", use is_kernel_addr() - it should do what you want.
  */
+#ifdef CONFIG_PPC_BOOK3E_64
+#define is_kernel_addr(x)	((x) >= 0x8000000000000000ul)
+#else
 #define is_kernel_addr(x)	((x) >= PAGE_OFFSET)
+#endif
 
 #ifndef __ASSEMBLY__
 
Index: linux-work/arch/powerpc/include/asm/mmu-hash64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-hash64.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-hash64.h	2009-07-24 18:14:49.000000000 +1000
@@ -139,26 +139,6 @@ struct mmu_psize_def
 #endif /* __ASSEMBLY__ */
 
 /*
- * The kernel use the constants below to index in the page sizes array.
- * The use of fixed constants for this purpose is better for performances
- * of the low level hash refill handlers.
- *
- * A non supported page size has a "shift" field set to 0
- *
- * Any new page size being implemented can get a new entry in here. Whether
- * the kernel will use it or not is a different matter though. The actual page
- * size used by hugetlbfs is not defined here and may be made variable
- */
-
-#define MMU_PAGE_4K		0	/* 4K */
-#define MMU_PAGE_64K		1	/* 64K */
-#define MMU_PAGE_64K_AP		2	/* 64K Admixed (in a 4K segment) */
-#define MMU_PAGE_1M		3	/* 1M */
-#define MMU_PAGE_16M		4	/* 16M */
-#define MMU_PAGE_16G		5	/* 16G */
-#define MMU_PAGE_COUNT		6
-
-/*
  * Segment sizes.
  * These are the values used by hardware in the B field of
  * SLB entries and the first dword of MMU hashtable entries.

^ permalink raw reply

* [PATCH 15/20] powerpc: Add definitions used by exception handling on 64-bit Book3E (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This adds various definitions and macros used by the exception and TLB
miss handling on 64-bit BookE

It also adds the definitions of the SPRGs used for various exception types

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Change __EXCEPTION_64E_H__ to _ASM_POWERPC_EXCEPTION_64E_H

 arch/powerpc/include/asm/exception-64e.h |  201 +++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/reg.h           |   19 ++
 2 files changed, 220 insertions(+)

--- linux-work.orig/arch/powerpc/include/asm/reg.h	2009-07-24 15:57:39.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg.h	2009-07-24 15:58:22.000000000 +1000
@@ -652,6 +652,16 @@
  *	- SPRG2 scratch for exception vectors
  *	- SPRG3 unused (user visible)
  *
+ * 64-bit embedded
+ *	- SPRG0 generic exception scratch
+ *	- SPRG2 TLB exception stack
+ *	- SPRG3 unused (user visible)
+ *	- SPRG4 unused (user visible)
+ *	- SPRG6 TLB miss scratch (user visible, sorry !)
+ *	- SPRG7 critical exception scratch
+ *	- SPRG8 machine check exception scratch
+ *	- SPRG9 debug exception scratch
+ *
  * All 32-bit:
  *	- SPRG3 current thread_info pointer
  *        (virtual on BookE, physical on others)
@@ -705,6 +715,15 @@
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
 #endif
 
+#ifdef CONFIG_PPC_BOOK3E_64
+#define SPRN_SPRG_MC_SCRATCH	SPRN_SPRG8
+#define SPRN_SPRG_CRIT_SCRATCH	SPRN_SPRG7
+#define SPRN_SPRG_DBG_SCRATCH	SPRN_SPRG9
+#define SPRN_SPRG_TLB_EXFRAME	SPRN_SPRG2
+#define SPRN_SPRG_TLB_SCRATCH	SPRN_SPRG6
+#define SPRN_SPRG_GEN_SCRATCH	SPRN_SPRG0
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_32
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
Index: linux-work/arch/powerpc/include/asm/exception-64e.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/include/asm/exception-64e.h	2009-07-24 15:59:09.000000000 +1000
@@ -0,0 +1,201 @@
+/*
+ *  Definitions for use by exception code on Book3-E
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_EXCEPTION_64E_H
+#define _ASM_POWERPC_EXCEPTION_64E_H
+
+/*
+ * SPRGs usage an other considerations...
+ *
+ * Since TLB miss and other standard exceptions can be interrupted by
+ * critical exceptions which can themselves be interrupted by machine
+ * checks, and since the two later can themselves cause a TLB miss when
+ * hitting the linear mapping for the kernel stacks, we need to be a bit
+ * creative on how we use SPRGs.
+ *
+ * The base idea is that we have one SRPG reserved for critical and one
+ * for machine check interrupts. Those are used to save a GPR that can
+ * then be used to get the PACA, and store as much context as we need
+ * to save in there. That includes saving the SPRGs used by the TLB miss
+ * handler for linear mapping misses and the associated SRR0/1 due to
+ * the above re-entrancy issue.
+ *
+ * So here's the current usage pattern. It's done regardless of which
+ * SPRGs are user-readable though, thus we might have to change some of
+ * this later. In order to do that more easily, we use special constants
+ * for naming them
+ *
+ * WARNING: Some of these SPRGs are user readable. We need to do something
+ * about it as some point by making sure they can't be used to leak kernel
+ * critical data
+ */
+
+
+/* We are out of SPRGs so we save some things in the PACA. The normal
+ * exception frame is smaller than the CRIT or MC one though
+ */
+#define EX_R1		(0 * 8)
+#define EX_CR		(1 * 8)
+#define EX_R10		(2 * 8)
+#define EX_R11		(3 * 8)
+#define EX_R14		(4 * 8)
+#define EX_R15		(5 * 8)
+
+/* The TLB miss exception uses different slots */
+
+#define EX_TLB_R10	( 0 * 8)
+#define EX_TLB_R11	( 1 * 8)
+#define EX_TLB_R12	( 2 * 8)
+#define EX_TLB_R13	( 3 * 8)
+#define EX_TLB_R14	( 4 * 8)
+#define EX_TLB_R15	( 5 * 8)
+#define EX_TLB_R16	( 6 * 8)
+#define EX_TLB_CR	( 7 * 8)
+#define EX_TLB_DEAR	( 8 * 8) /* Level 0 and 2 only */
+#define EX_TLB_ESR	( 9 * 8) /* Level 0 and 2 only */
+#define EX_TLB_SRR0	(10 * 8)
+#define EX_TLB_SRR1	(11 * 8)
+#define EX_TLB_MMUCR0	(12 * 8) /* Level 0 */
+#define EX_TLB_MAS1	(12 * 8) /* Level 0 */
+#define EX_TLB_MAS2	(13 * 8) /* Level 0 */
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+#define EX_TLB_R8	(14 * 8)
+#define EX_TLB_R9	(15 * 8)
+#define EX_TLB_LR	(16 * 8)
+#define EX_TLB_SIZE	(17 * 8)
+#else
+#define EX_TLB_SIZE	(14 * 8)
+#endif
+
+#define	START_EXCEPTION(label)						\
+	.globl exc_##label##_book3e;					\
+exc_##label##_book3e:
+
+/* TLB miss exception prolog
+ *
+ * This prolog handles re-entrancy (up to 3 levels supported in the PACA
+ * though we currently don't test for overflow). It provides you with a
+ * re-entrancy safe working space of r10...r16 and CR with r12 being used
+ * as the exception area pointer in the PACA for that level of re-entrancy
+ * and r13 containing the PACA pointer.
+ *
+ * SRR0 and SRR1 are saved, but DEAR and ESR are not, since they don't apply
+ * as-is for instruction exceptions. It's up to the actual exception code
+ * to save them as well if required.
+ */
+#define TLB_MISS_PROLOG							    \
+	mtspr	SPRN_SPRG_TLB_SCRATCH,r12;				    \
+	mfspr	r12,SPRN_SPRG_TLB_EXFRAME;				    \
+	std	r10,EX_TLB_R10(r12);					    \
+	mfcr	r10;							    \
+	std	r11,EX_TLB_R11(r12);					    \
+	mfspr	r11,SPRN_SPRG_TLB_SCRATCH;				    \
+	std	r13,EX_TLB_R13(r12);					    \
+	mfspr	r13,SPRN_SPRG_PACA;					    \
+	std	r14,EX_TLB_R14(r12);					    \
+	addi	r14,r12,EX_TLB_SIZE;					    \
+	std	r15,EX_TLB_R15(r12);					    \
+	mfspr	r15,SPRN_SRR1;						    \
+	std	r16,EX_TLB_R16(r12);					    \
+	mfspr	r16,SPRN_SRR0;						    \
+	std	r10,EX_TLB_CR(r12);					    \
+	std	r11,EX_TLB_R12(r12);					    \
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r14;				    \
+	std	r15,EX_TLB_SRR1(r12);					    \
+	std	r16,EX_TLB_SRR0(r12);					    \
+	TLB_MISS_PROLOG_STATS
+
+/* And these are the matching epilogs that restores things
+ *
+ * There are 3 epilogs:
+ *
+ * - SUCCESS       : Unwinds one level
+ * - ERROR         : restore from level 0 and reset
+ * - ERROR_SPECIAL : restore from current level and reset
+ *
+ * Normal errors use ERROR, that is, they restore the initial fault context
+ * and trigger a fault. However, there is a special case for linear mapping
+ * errors. Those should basically never happen, but if they do happen, we
+ * want the error to point out the context that did that linear mapping
+ * fault, not the initial level 0 (basically, we got a bogus PGF or something
+ * like that). For userland errors on the linear mapping, there is no
+ * difference since those are always level 0 anyway
+ */
+
+#define TLB_MISS_RESTORE(freg)						    \
+	ld	r14,EX_TLB_CR(r12);					    \
+	ld	r10,EX_TLB_R10(r12);					    \
+	ld	r15,EX_TLB_SRR0(r12);					    \
+	ld	r16,EX_TLB_SRR1(r12);					    \
+	mtspr	SPRN_SPRG_TLB_EXFRAME,freg;				    \
+	ld	r11,EX_TLB_R11(r12);					    \
+	mtcr	r14;							    \
+	ld	r13,EX_TLB_R13(r12);					    \
+	ld	r14,EX_TLB_R14(r12);					    \
+	mtspr	SPRN_SRR0,r15;						    \
+	ld	r15,EX_TLB_R15(r12);					    \
+	mtspr	SPRN_SRR1,r16;						    \
+	TLB_MISS_RESTORE_STATS						    \
+	ld	r16,EX_TLB_R16(r12);					    \
+	ld	r12,EX_TLB_R12(r12);					    \
+
+#define TLB_MISS_EPILOG_SUCCESS						    \
+	TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR						    \
+	addi	r12,r13,PACA_EXTLB;					    \
+	TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR_SPECIAL					    \
+	addi	r11,r13,PACA_EXTLB;					    \
+	TLB_MISS_RESTORE(r11)
+
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+#define TLB_MISS_PROLOG_STATS						    \
+	mflr	r10;							    \
+	std	r8,EX_TLB_R8(r12);					    \
+	std	r9,EX_TLB_R9(r12);					    \
+	std	r10,EX_TLB_LR(r12);
+#define TLB_MISS_RESTORE_STATS					            \
+	ld	r16,EX_TLB_LR(r12);					    \
+	ld	r9,EX_TLB_R9(r12);					    \
+	ld	r8,EX_TLB_R8(r12);					    \
+	mtlr	r16;
+#define TLB_MISS_STATS_D(name)						    \
+	addi	r9,r13,MMSTAT_DSTATS+name;				    \
+	bl	.tlb_stat_inc;
+#define TLB_MISS_STATS_I(name)						    \
+	addi	r9,r13,MMSTAT_ISTATS+name;				    \
+	bl	.tlb_stat_inc;
+#define TLB_MISS_STATS_X(name)						    \
+	ld	r8,PACA_EXTLB+EX_TLB_ESR(r13);				    \
+	cmpdi	cr2,r8,-1;						    \
+	beq	cr2,61f;						    \
+	addi	r9,r13,MMSTAT_DSTATS+name;				    \
+	b	62f;							    \
+61:	addi	r9,r13,MMSTAT_ISTATS+name;				    \
+62:	bl	.tlb_stat_inc;
+#define TLB_MISS_STATS_SAVE_INFO					    \
+	std	r14,EX_TLB_ESR(r12);	/* save ESR */			    \
+
+
+#else
+#define TLB_MISS_PROLOG_STATS
+#define TLB_MISS_RESTORE_STATS
+#define TLB_MISS_STATS_D(name)
+#define TLB_MISS_STATS_I(name)
+#define TLB_MISS_STATS_X(name)
+#define TLB_MISS_STATS_Y(name)
+#define TLB_MISS_STATS_SAVE_INFO
+#endif
+
+
+#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
+

^ permalink raw reply

* [PATCH 16/20] powerpc: Add PACA fields specific to 64-bit Book3E processors
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This adds various fields in the PACA that are for use specifically
by Book3E processors, such as exception save areas, current pgd
pointer, special exceptions kernel stacks etc...

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/include/asm/paca.h   |   23 ++++++++++++++++++++---
 arch/powerpc/kernel/asm-offsets.c |   14 ++++++++++++++
 arch/powerpc/kernel/paca.c        |    3 +++
 3 files changed, 37 insertions(+), 3 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/paca.h	2009-07-23 14:29:58.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/paca.h	2009-07-23 14:31:42.000000000 +1000
@@ -14,9 +14,11 @@
 #define _ASM_POWERPC_PACA_H
 #ifdef __KERNEL__
 
-#include	<asm/types.h>
-#include	<asm/lppaca.h>
-#include	<asm/mmu.h>
+#include <asm/types.h>
+#include <asm/lppaca.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/exception-64e.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -91,6 +93,21 @@ struct paca_struct {
 	u16 slb_cache[SLB_CACHE_ENTRIES];
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
+#ifdef CONFIG_PPC_BOOK3E
+	pgd_t *pgd;			/* Current PGD */
+	pgd_t *kernel_pgd;		/* Kernel PGD */
+	u64 exgen[8] __attribute__((aligned(0x80)));
+	u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+	u64 exmc[8];		/* used for machine checks */
+	u64 excrit[8];		/* used for crit interrupts */
+	u64 exdbg[8];		/* used for debug interrupts */
+
+	/* Kernel stack pointers for use by special exceptions */
+	void *mc_kstack;
+	void *crit_kstack;
+	void *dbg_kstack;
+#endif /* CONFIG_PPC_BOOK3E */
+
 	mm_context_t context;
 
 	/*
Index: linux-work/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/asm-offsets.c	2009-07-23 14:31:35.000000000 +1000
+++ linux-work/arch/powerpc/kernel/asm-offsets.c	2009-07-23 14:31:42.000000000 +1000
@@ -140,6 +140,20 @@ int main(void)
 					    context.high_slices_psize));
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_PPC_BOOK3E
+	DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
+	DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
+	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+	DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
+	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+	DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
+	DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
+	DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
+	DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
+	DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+#endif /* CONFIG_PPC_BOOK3E */
+
 #ifdef CONFIG_PPC_STD_MMU_64
 	DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
 	DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
Index: linux-work/arch/powerpc/kernel/paca.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/paca.c	2009-07-23 14:29:58.000000000 +1000
+++ linux-work/arch/powerpc/kernel/paca.c	2009-07-23 14:31:42.000000000 +1000
@@ -13,6 +13,7 @@
 #include <asm/lppaca.h>
 #include <asm/paca.h>
 #include <asm/sections.h>
+#include <asm/pgtable.h>
 
 /* This symbol is provided by the linker - let it fill in the paca
  * field correctly */
@@ -87,6 +88,8 @@ void __init initialise_pacas(void)
 
 #ifdef CONFIG_PPC_BOOK3S
 		new_paca->lppaca_ptr = &lppaca[cpu];
+#else
+		new_paca->kernel_pgd = swapper_pg_dir;
 #endif
 		new_paca->lock_token = 0x8000;
 		new_paca->paca_index = cpu;

^ permalink raw reply

* [PATCH 17/20] powerpc/mm: Move around mmu_gathers definition on 64-bit (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

The definition for the global structure mmu_gathers, used by generic code,
is currently defined in multiple places not including anything used by
64-bit Book3E. This changes it by moving to one place common to all
processors.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Fix issues due to changes in other patches

 arch/powerpc/mm/init_32.c    |    2 --
 arch/powerpc/mm/pgtable.c    |    2 ++
 arch/powerpc/mm/tlb_hash64.c |    5 -----
 3 files changed, 2 insertions(+), 7 deletions(-)

--- linux-work.orig/arch/powerpc/mm/tlb_hash64.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_hash64.c	2009-07-24 18:14:51.000000000 +1000
@@ -33,11 +33,6 @@
 
 DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 
-/* This is declared as we are using the more or less generic
- * arch/powerpc/include/asm/tlb.h file -- tgall
- */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 /*
  * A linux PTE was changed and the corresponding hash table entry
  * neesd to be flushed. This function will either perform the flush
Index: linux-work/arch/powerpc/mm/init_32.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/init_32.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/init_32.c	2009-07-24 18:14:51.000000000 +1000
@@ -54,8 +54,6 @@
 #endif
 #define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 phys_addr_t total_memory;
 phys_addr_t total_lowmem;
 
Index: linux-work/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/pgtable.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/pgtable.c	2009-07-24 18:15:05.000000000 +1000
@@ -30,6 +30,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
 #ifdef CONFIG_SMP
 
 /*

^ permalink raw reply

* [PATCH 18/20] powerpc: Add TLB management code for 64-bit Book3E (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This adds the TLB miss handler assembly, the low level TLB flush routines
along with the necessary hook for dealing with our virtual page tables
or indirect TLB entries that need to be flushes when PTE pages are freed.

There is currently no support for hugetlbfs

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Don't break non-Book3E nohash platforms

 arch/powerpc/include/asm/mmu-40x.h     |    3 
 arch/powerpc/include/asm/mmu-44x.h     |    6 
 arch/powerpc/include/asm/mmu-8xx.h     |    3 
 arch/powerpc/include/asm/mmu-hash32.h  |    6 
 arch/powerpc/include/asm/mmu_context.h |    8 
 arch/powerpc/kernel/setup_64.c         |    4 
 arch/powerpc/mm/mmu_decl.h             |   14 
 arch/powerpc/mm/tlb_low_64e.S          |  734 +++++++++++++++++++++++++++++++++
 arch/powerpc/mm/tlb_nohash.c           |  203 ++++++++-
 arch/powerpc/mm/tlb_nohash_low.S       |   79 +++
 10 files changed, 1055 insertions(+), 5 deletions(-)

--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/mm/tlb_low_64e.S	2009-07-24 18:15:31.000000000 +1000
@@ -0,0 +1,734 @@
+/*
+ *  Low leve TLB miss handlers for Book3E
+ *
+ *  Copyright (C) 2008-2009
+ *      Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/exception-64e.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE+1)
+#else
+#define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE)
+#endif
+#define VPTE_PUD_SHIFT	(VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
+#define VPTE_PGD_SHIFT	(VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
+#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
+
+
+/**********************************************************************
+ *                                                                    *
+ * TLB miss handling for Book3E with TLB reservation and HES support  *
+ *                                                                    *
+ **********************************************************************/
+
+
+/* Data TLB miss */
+	START_EXCEPTION(data_tlb_miss)
+	TLB_MISS_PROLOG
+
+	/* Now we handle the fault proper. We only save DEAR in normal
+	 * fault case since that's the only interesting values here.
+	 * We could probably also optimize by not saving SRR0/1 in the
+	 * linear mapping case but I'll leave that for later
+	 */
+	mfspr	r14,SPRN_ESR
+	mfspr	r16,SPRN_DEAR		/* get faulting address */
+	srdi	r15,r16,60		/* get region */
+	cmpldi	cr0,r15,0xc		/* linear mapping ? */
+	TLB_MISS_STATS_SAVE_INFO
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+
+	/* The page tables are mapped virtually linear. At this point, though,
+	 * we don't know whether we are trying to fault in a first level
+	 * virtual address or a virtual page table address. We can get that
+	 * from bit 0x1 of the region ID which we have set for a page table
+	 */
+	andi.	r10,r15,0x1
+	bne-	virt_page_table_tlb_miss
+
+	std	r14,EX_TLB_ESR(r12);	/* save ESR */
+	std	r16,EX_TLB_DEAR(r12);	/* save DEAR */
+
+	 /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
+	li	r11,_PAGE_PRESENT
+	oris	r11,r11,_PAGE_ACCESSED@h
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	cmpldi	cr0,r15,0		/* Check for user region */
+
+	/* We pre-test some combination of permissions to avoid double
+	 * faults:
+	 *
+	 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+	 * ESR_ST   is 0x00800000
+	 * _PAGE_BAP_SW is 0x00000010
+	 * So the shift is >> 19. This tests for supervisor writeability.
+	 * If the page happens to be supervisor writeable and not user
+	 * writeable, we will take a new fault later, but that should be
+	 * a rare enough case.
+	 *
+	 * We also move ESR_ST in _PAGE_DIRTY position
+	 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+	 *
+	 * MAS1 is preset for all we need except for TID that needs to
+	 * be cleared for kernel translations
+	 */
+	rlwimi	r11,r14,32-19,27,27
+	rlwimi	r11,r14,32-16,19,19
+	beq	normal_tlb_miss
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	cmpldi	cr0,r15,8		/* Check for vmalloc region */
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	beq+	normal_tlb_miss
+
+	/* We got a crappy address, just fault with whatever DEAR and ESR
+	 * are here
+	 */
+	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+/* Instruction TLB miss */
+	START_EXCEPTION(instruction_tlb_miss)
+	TLB_MISS_PROLOG
+
+	/* If we take a recursive fault, the second level handler may need
+	 * to know whether we are handling a data or instruction fault in
+	 * order to get to the right store fault handler. We provide that
+	 * info by writing a crazy value in ESR in our exception frame
+	 */
+	li	r14,-1	/* store to exception frame is done later */
+
+	/* Now we handle the fault proper. We only save DEAR in the non
+	 * linear mapping case since we know the linear mapping case will
+	 * not re-enter. We could indeed optimize and also not save SRR0/1
+	 * in the linear mapping case but I'll leave that for later
+	 *
+	 * Faulting address is SRR0 which is already in r16
+	 */
+	srdi	r15,r16,60		/* get region */
+	cmpldi	cr0,r15,0xc		/* linear mapping ? */
+	TLB_MISS_STATS_SAVE_INFO
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	li	r11,_PAGE_PRESENT|_PAGE_HWEXEC	/* Base perm */
+	oris	r11,r11,_PAGE_ACCESSED@h
+
+	cmpldi	cr0,r15,0			/* Check for user region */
+	std	r14,EX_TLB_ESR(r12)		/* write crazy -1 to frame */
+	beq	normal_tlb_miss
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	cmpldi	cr0,r15,8			/* Check for vmalloc region */
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	beq+	normal_tlb_miss
+
+	/* We got a crappy address, just fault */
+	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+/*
+ * This is the guts of the first-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = region ID
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+normal_tlb_miss:
+	/* So we first construct the page table address. We do that by
+	 * shifting the bottom of the address (not the region ID) by
+	 * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
+	 * or'ing the fourth high bit.
+	 *
+	 * NOTE: For 64K pages, we do things slightly differently in
+	 * order to handle the weird page table format used by linux
+	 */
+	ori	r10,r15,0x1
+#ifdef CONFIG_PPC_64K_PAGES
+	/* For the top bits, 16 bytes per PTE */
+	rldicl	r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
+	/* Now create the bottom bits as 0 in position 0x8000 and
+	 * the rest calculated for 8 bytes per PTE
+	 */
+	rldicl	r15,r16,64-(PAGE_SHIFT-3),64-15
+	/* Insert the bottom bits in */
+	rlwimi	r14,r15,0,16,31
+#else
+	rldicl	r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
+#endif
+	sldi	r15,r10,60
+	clrrdi	r14,r14,3
+	or	r10,r15,r14
+
+	/* Set the TLB reservation and seach for existing entry. Then load
+	 * the entry.
+	 */
+	PPC_TLBSRX_DOT(0,r16)
+	ld	r14,0(r10)
+	beq	normal_tlb_miss_done
+
+finish_normal_tlb_miss:
+	/* Check if required permissions are met */
+	andc.	r15,r11,r14
+	bne-	normal_tlb_miss_access_fault
+
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE need change if !base page size, not
+	 *                 yet implemented for now
+	 * MAS 2   :	Defaults not useful, need to be redone
+	 * MAS 3+7 :	Needs to be done
+	 *
+	 * TODO: mix up code below for better scheduling
+	 */
+	clrrdi	r11,r16,12		/* Clear low crap in EA */
+	rlwimi	r11,r14,32-19,27,31	/* Insert WIMGE */
+	mtspr	SPRN_MAS2,r11
+
+	/* Check page size, if not standard, update MAS1 */
+	rldicl	r11,r14,64-8,64-8
+#ifdef CONFIG_PPC_64K_PAGES
+	cmpldi	cr0,r11,BOOK3E_PAGESZ_64K
+#else
+	cmpldi	cr0,r11,BOOK3E_PAGESZ_4K
+#endif
+	beq-	1f
+	mfspr	r11,SPRN_MAS1
+	rlwimi	r11,r14,31,21,24
+	rlwinm	r11,r11,0,21,19
+	mtspr	SPRN_MAS1,r11
+1:
+	/* Move RPN in position */
+	rldicr	r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+	clrldi	r15,r11,12		/* Clear crap at the top */
+	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
+	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
+
+	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+	andi.	r11,r14,_PAGE_DIRTY
+	bne	1f
+	li	r11,MAS3_SW|MAS3_UW
+	andc	r15,r15,r11
+1:	mtspr	SPRN_MAS7_MAS3,r15
+
+	tlbwe
+
+normal_tlb_miss_done:
+	/* We don't bother with restoring DEAR or ESR since we know we are
+	 * level 0 and just going back to userland. They are only needed
+	 * if you are going to take an access fault
+	 */
+	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+normal_tlb_miss_access_fault:
+	/* We need to check if it was an instruction miss */
+	andi.	r10,r11,_PAGE_HWEXEC
+	bne	1f
+	ld	r14,EX_TLB_DEAR(r12)
+	ld	r15,EX_TLB_ESR(r12)
+	mtspr	SPRN_DEAR,r14
+	mtspr	SPRN_ESR,r15
+	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = region (top 4 bits of address)
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * Note that this should only ever be called as a second level handler
+ * with the current scheme when using SW load.
+ * That means we can always get the original fault DEAR at
+ * EX_TLB_DEAR-EX_TLB_SIZE(r12)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will restart the whole fault at level
+ * 0 so we don't care too much about clobbers
+ *
+ * XXX That code was written back when we couldn't clobber r14. We can now,
+ * so we could probably optimize things a bit
+ */
+virt_page_table_tlb_miss:
+	/* Are we hitting a kernel page table ? */
+	andi.	r10,r15,0x8
+
+	/* The cool thing now is that r10 contains 0 for user and 8 for kernel,
+	 * and we happen to have the swapper_pg_dir at offset 8 from the user
+	 * pgdir in the PACA :-).
+	 */
+	add	r11,r10,r13
+
+	/* If kernel, we need to clear MAS1 TID */
+	beq	1f
+	/* XXX replace the RMW cycles with immediate loads + writes */
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+1:
+	/* Search if we already have a TLB entry for that virtual address, and
+	 * if we do, bail out.
+	 */
+	PPC_TLBSRX_DOT(0,r16)
+	beq	virt_page_table_tlb_miss_done
+
+	/* Now, we need to walk the page tables. First check if we are in
+	 * range.
+	 */
+	rldicl.	r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+	bne-	virt_page_table_tlb_miss_fault
+
+	/* Get the PGD pointer */
+	ld	r15,PACAPGD(r11)
+	cmpldi	cr0,r15,0
+	beq-	virt_page_table_tlb_miss_fault
+
+	/* Get to PGD entry */
+	rldicl	r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	virt_page_table_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+	/* Get to PUD entry */
+	rldicl	r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	virt_page_table_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	/* Get to PMD entry */
+	rldicl	r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	virt_page_table_tlb_miss_fault
+
+	/* Ok, we're all right, we can now create a kernel translation for
+	 * a 4K or 64K page from r16 -> r15.
+	 */
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE for now is base page size always
+	 * MAS 2   :	Use defaults
+	 * MAS 3+7 :	Needs to be done
+	 *
+	 * So we only do MAS 2 and 3 for now...
+	 */
+	clrldi	r11,r15,4		/* remove region ID from RPN */
+	ori	r10,r11,1		/* Or-in SR */
+	mtspr	SPRN_MAS7_MAS3,r10
+
+	tlbwe
+
+virt_page_table_tlb_miss_done:
+
+	/* We have overriden MAS2:EPN but currently our primary TLB miss
+	 * handler will always restore it so that should not be an issue,
+	 * if we ever optimize the primary handler to not write MAS2 on
+	 * some cases, we'll have to restore MAS2:EPN here based on the
+	 * original fault's DEAR. If we do that we have to modify the
+	 * ITLB miss handler to also store SRR0 in the exception frame
+	 * as DEAR.
+	 *
+	 * However, one nasty thing we did is we cleared the reservation
+	 * (well, potentially we did). We do a trick here thus if we
+	 * are not a level 0 exception (we interrupted the TLB miss) we
+	 * offset the return address by -4 in order to replay the tlbsrx
+	 * instruction there
+	 */
+	subf	r10,r13,r12
+	cmpldi	cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+	bne-	1f
+	ld	r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+	addi	r10,r11,-4
+	std	r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+1:
+	/* Return to caller, normal case */
+	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+virt_page_table_tlb_miss_fault:
+	/* If we fault here, things are a little bit tricky. We need to call
+	 * either data or instruction store fault, and we need to retreive
+	 * the original fault address and ESR (for data).
+	 *
+	 * The thing is, we know that in normal circumstances, this is
+	 * always called as a second level tlb miss for SW load or as a first
+	 * level TLB miss for HW load, so we should be able to peek at the
+	 * relevant informations in the first exception frame in the PACA.
+	 *
+	 * However, we do need to double check that, because we may just hit
+	 * a stray kernel pointer or a userland attack trying to hit those
+	 * areas. If that is the case, we do a data fault. (We can't get here
+	 * from an instruction tlb miss anyway).
+	 *
+	 * Note also that when going to a fault, we must unwind the previous
+	 * level as well. Since we are doing that, we don't need to clear or
+	 * restore the TLB reservation neither.
+	 */
+	subf	r10,r13,r12
+	cmpldi	cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+	bne-	virt_page_table_tlb_miss_whacko_fault
+
+	/* We dig the original DEAR and ESR from slot 0 */
+	ld	r15,EX_TLB_DEAR+PACA_EXTLB(r13)
+	ld	r16,EX_TLB_ESR+PACA_EXTLB(r13)
+
+	/* We check for the "special" ESR value for instruction faults */
+	cmpdi	cr0,r16,-1
+	beq	1f
+	mtspr	SPRN_DEAR,r15
+	mtspr	SPRN_ESR,r16
+	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+virt_page_table_tlb_miss_whacko_fault:
+	/* The linear fault will restart everything so ESR and DEAR will
+	 * not have been clobbered, let's just fault with what we have
+	 */
+	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+
+/**************************************************************
+ *                                                            *
+ * TLB miss handling for Book3E with hw page table support    *
+ *                                                            *
+ **************************************************************/
+
+
+/* Data TLB miss */
+	START_EXCEPTION(data_tlb_miss_htw)
+	TLB_MISS_PROLOG
+
+	/* Now we handle the fault proper. We only save DEAR in normal
+	 * fault case since that's the only interesting values here.
+	 * We could probably also optimize by not saving SRR0/1 in the
+	 * linear mapping case but I'll leave that for later
+	 */
+	mfspr	r14,SPRN_ESR
+	mfspr	r16,SPRN_DEAR		/* get faulting address */
+	srdi	r11,r16,60		/* get region */
+	cmpldi	cr0,r11,0xc		/* linear mapping ? */
+	TLB_MISS_STATS_SAVE_INFO
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	cmpldi	cr0,r11,0		/* Check for user region */
+	ld	r15,PACAPGD(r13)	/* Load user pgdir */
+	beq	htw_tlb_miss
+
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	cmpldi	cr0,r11,8		/* Check for vmalloc region */
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	ld	r15,PACA_KERNELPGD(r13)	/* Load kernel pgdir */
+	beq+	htw_tlb_miss
+
+	/* We got a crappy address, just fault with whatever DEAR and ESR
+	 * are here
+	 */
+	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+/* Instruction TLB miss */
+	START_EXCEPTION(instruction_tlb_miss_htw)
+	TLB_MISS_PROLOG
+
+	/* If we take a recursive fault, the second level handler may need
+	 * to know whether we are handling a data or instruction fault in
+	 * order to get to the right store fault handler. We provide that
+	 * info by keeping a crazy value for ESR in r14
+	 */
+	li	r14,-1	/* store to exception frame is done later */
+
+	/* Now we handle the fault proper. We only save DEAR in the non
+	 * linear mapping case since we know the linear mapping case will
+	 * not re-enter. We could indeed optimize and also not save SRR0/1
+	 * in the linear mapping case but I'll leave that for later
+	 *
+	 * Faulting address is SRR0 which is already in r16
+	 */
+	srdi	r11,r16,60		/* get region */
+	cmpldi	cr0,r11,0xc		/* linear mapping ? */
+	TLB_MISS_STATS_SAVE_INFO
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	cmpldi	cr0,r11,0			/* Check for user region */
+	ld	r15,PACAPGD(r13)		/* Load user pgdir */
+	beq	htw_tlb_miss
+
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	cmpldi	cr0,r11,8			/* Check for vmalloc region */
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	ld	r15,PACA_KERNELPGD(r13)		/* Load kernel pgdir */
+	beq+	htw_tlb_miss
+
+	/* We got a crappy address, just fault */
+	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = PGD pointer
+ * r14 = ESR
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will save/restore things for us
+ */
+htw_tlb_miss:
+	/* Search if we already have a TLB entry for that virtual address, and
+	 * if we do, bail out.
+	 *
+	 * MAS1:IND should be already set based on MAS4
+	 */
+	PPC_TLBSRX_DOT(0,r16)
+	beq	htw_tlb_miss_done
+
+	/* Now, we need to walk the page tables. First check if we are in
+	 * range.
+	 */
+	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+	bne-	htw_tlb_miss_fault
+
+	/* Get the PGD pointer */
+	cmpldi	cr0,r15,0
+	beq-	htw_tlb_miss_fault
+
+	/* Get to PGD entry */
+	rldicl	r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	htw_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+	/* Get to PUD entry */
+	rldicl	r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	htw_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	/* Get to PMD entry */
+	rldicl	r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpldi	cr0,r15,0
+	beq	htw_tlb_miss_fault
+
+	/* Ok, we're all right, we can now create an indirect entry for
+	 * a 1M or 256M page.
+	 *
+	 * The last trick is now that because we use "half" pages for
+	 * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
+	 * for an added LSB bit to the RPN. For 64K pages, there is no
+	 * problem as we already use 32K arrays (half PTE pages), but for
+	 * 4K page we need to extract a bit from the virtual address and
+	 * insert it into the "PA52" bit of the RPN.
+	 */
+#ifndef CONFIG_PPC_64K_PAGES
+	rlwimi	r15,r16,32-9,20,20
+#endif
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE for now is base ind page size always
+	 * MAS 2   :	Use defaults
+	 * MAS 3+7 :	Needs to be done
+	 */
+#ifdef CONFIG_PPC_64K_PAGES
+	ori	r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
+#else
+	ori	r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+#endif
+	mtspr	SPRN_MAS7_MAS3,r10
+
+	tlbwe
+
+htw_tlb_miss_done:
+	/* We don't bother with restoring DEAR or ESR since we know we are
+	 * level 0 and just going back to userland. They are only needed
+	 * if you are going to take an access fault
+	 */
+	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+htw_tlb_miss_fault:
+	/* We need to check if it was an instruction miss. We know this
+	 * though because r14 would contain -1
+	 */
+	cmpdi	cr0,r14,-1
+	beq	1f
+	mtspr	SPRN_DEAR,r16
+	mtspr	SPRN_ESR,r14
+	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+/*
+ * This is the guts of "any" level TLB miss handler for kernel linear
+ * mapping misses. We are entered with:
+ *
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = ESR (data) or -1 (instruction)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * In addition we know that we will not re-enter, so in theory, we could
+ * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
+ *
+ * We also need to be careful about MAS registers here & TLB reservation,
+ * as we know we'll have clobbered them if we interrupt the main TLB miss
+ * handlers in which case we probably want to do a full restart at level
+ * 0 rather than saving / restoring the MAS.
+ *
+ * Note: If we care about performance of that core, we can easily shuffle
+ *       a few things around
+ */
+tlb_load_linear:
+	/* For now, we assume the linear mapping is contiguous and stops at
+	 * linear_map_top. We also assume the size is a multiple of 1G, thus
+	 * we only use 1G pages for now. That might have to be changed in a
+	 * final implementation, especially when dealing with hypervisors
+	 */
+	ld	r11,PACATOC(r13)
+	ld	r11,linear_map_top@got(r11)
+	ld	r10,0(r11)
+	cmpld	cr0,r10,r16
+	bge	tlb_load_linear_fault
+
+	/* MAS1 need whole new setup. */
+	li	r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT)
+	oris	r15,r15,MAS1_VALID@h	/* MAS1 needs V and TSIZE */
+	mtspr	SPRN_MAS1,r15
+
+	/* Already somebody there ? */
+	PPC_TLBSRX_DOT(0,r16)
+	beq	tlb_load_linear_done
+
+	/* Now we build the remaining MAS. MAS0 and 2 should be fine
+	 * with their defaults, which leaves us with MAS 3 and 7. The
+	 * mapping is linear, so we just take the address, clear the
+	 * region bits, and or in the permission bits which are currently
+	 * hard wired
+	 */
+	clrrdi	r10,r16,30		/* 1G page index */
+	clrldi	r10,r10,4		/* clear region bits */
+	ori	r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
+	mtspr	SPRN_MAS7_MAS3,r10
+
+	tlbwe
+
+tlb_load_linear_done:
+	/* We use the "error" epilog for success as we do want to
+	 * restore to the initial faulting context, whatever it was.
+	 * We do that because we can't resume a fault within a TLB
+	 * miss handler, due to MAS and TLB reservation being clobbered.
+	 */
+	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
+	TLB_MISS_EPILOG_ERROR
+	rfi
+
+tlb_load_linear_fault:
+	/* We keep the DEAR and ESR around, this shouldn't have happened */
+	cmpdi	cr0,r14,-1
+	beq	1f
+	TLB_MISS_EPILOG_ERROR_SPECIAL
+	b	exc_data_storage_book3e
+1:	TLB_MISS_EPILOG_ERROR_SPECIAL
+	b	exc_instruction_storage_book3e
+
+
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+.tlb_stat_inc:
+1:	ldarx	r8,0,r9
+	addi	r8,r8,1
+	stdcx.	r8,0,r9
+	bne-	1b
+	blr
+#endif
Index: linux-work/arch/powerpc/mm/mmu_decl.h
===================================================================
--- linux-work.orig/arch/powerpc/mm/mmu_decl.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/mmu_decl.h	2009-07-24 18:15:35.000000000 +1000
@@ -41,7 +41,11 @@ static inline void _tlbil_pid(unsigned i
 #else /* CONFIG_40x || CONFIG_8xx */
 extern void _tlbil_all(void);
 extern void _tlbil_pid(unsigned int pid);
+#ifdef CONFIG_PPC_BOOK3E
+extern void _tlbil_pid_noind(unsigned int pid);
+#else
 #define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
+#endif
 #endif /* !(CONFIG_40x || CONFIG_8xx) */
 
 /*
@@ -53,7 +57,10 @@ static inline void _tlbil_va(unsigned lo
 {
 	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
 }
-#else /* CONFIG_8xx */
+#elif defined(CONFIG_PPC_BOOK3E)
+extern void _tlbil_va(unsigned long address, unsigned int pid,
+		      unsigned int tsize, unsigned int ind);
+#else
 extern void __tlbil_va(unsigned long address, unsigned int pid);
 static inline void _tlbil_va(unsigned long address, unsigned int pid,
 			     unsigned int tsize, unsigned int ind)
@@ -67,11 +74,16 @@ static inline void _tlbil_va(unsigned lo
  * implementation. When that becomes the case, this will be
  * an extern.
  */
+#ifdef CONFIG_PPC_BOOK3E
+extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
+			   unsigned int tsize, unsigned int ind);
+#else
 static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
 				   unsigned int tsize, unsigned int ind)
 {
 	BUG();
 }
+#endif
 
 #else /* CONFIG_PPC_MMU_NOHASH */
 
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c	2009-07-24 18:15:35.000000000 +1000
@@ -7,8 +7,8 @@
  *
  *  -- BenH
  *
- * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
- *                IBM Corp.
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ *                     IBM Corp.
  *
  *  Derived from arch/ppc/mm/init.c:
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -34,12 +34,70 @@
 #include <linux/pagemap.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
+#include <linux/lmb.h>
 
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
+#include <asm/code-patching.h>
 
 #include "mmu_decl.h"
 
+#ifdef CONFIG_PPC_BOOK3E
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+	[MMU_PAGE_4K] = {
+		.shift	= 12,
+		.enc	= BOOK3E_PAGESZ_4K,
+	},
+	[MMU_PAGE_16K] = {
+		.shift	= 14,
+		.enc	= BOOK3E_PAGESZ_16K,
+	},
+	[MMU_PAGE_64K] = {
+		.shift	= 16,
+		.enc	= BOOK3E_PAGESZ_64K,
+	},
+	[MMU_PAGE_1M] = {
+		.shift	= 20,
+		.enc	= BOOK3E_PAGESZ_1M,
+	},
+	[MMU_PAGE_16M] = {
+		.shift	= 24,
+		.enc	= BOOK3E_PAGESZ_16M,
+	},
+	[MMU_PAGE_256M] = {
+		.shift	= 28,
+		.enc	= BOOK3E_PAGESZ_256M,
+	},
+	[MMU_PAGE_1G] = {
+		.shift	= 30,
+		.enc	= BOOK3E_PAGESZ_1GB,
+	},
+};
+static inline int mmu_get_tsize(int psize)
+{
+	return mmu_psize_defs[psize].enc;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+	/* This isn't used on !Book3E for now */
+	return 0;
+}
+#endif
+
+/* The variables below are currently only used on 64-bit Book3E
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+#ifdef CONFIG_PPC64
+
+int mmu_linear_psize;		/* Page size used for the linear mapping */
+int mmu_pte_psize;		/* Page size used for PTE pages */
+int book3e_htw_enabled;		/* Is HW tablewalk enabled ? */
+unsigned long linear_map_top;	/* Top of linear mapping */
+
+#endif /* CONFIG_PPC64 */
+
 /*
  * Base TLB flushing operations:
  *
@@ -82,7 +140,7 @@ void __local_flush_tlb_page(struct mm_st
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
 	__local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-			       0 /* tsize unused for now */, 0);
+			       mmu_get_tsize(mmu_virtual_psize), 0);
 }
 EXPORT_SYMBOL(local_flush_tlb_page);
 
@@ -198,7 +256,7 @@ void __flush_tlb_page(struct mm_struct *
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 {
 	__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
-			 0 /* tsize unused for now */, 0);
+			 mmu_get_tsize(mmu_virtual_psize), 0);
 }
 EXPORT_SYMBOL(flush_tlb_page);
 
@@ -241,3 +299,140 @@ void tlb_flush(struct mmu_gather *tlb)
 	/* Push out batch of freed page tables */
 	pte_free_finish();
 }
+
+/*
+ * Below are functions specific to the 64-bit variant of Book3E though that
+ * may change in the future
+ */
+
+#ifdef CONFIG_PPC64
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+	int tsize = mmu_psize_defs[mmu_pte_psize].enc;
+
+	if (book3e_htw_enabled) {
+		unsigned long start = address & PMD_MASK;
+		unsigned long end = address + PMD_SIZE;
+		unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+
+		/* This isn't the most optimal, ideally we would factor out the
+		 * while preempt & CPU mask mucking around, or even the IPI but
+		 * it will do for now
+		 */
+		while (start < end) {
+			__flush_tlb_page(tlb->mm, start, tsize, 1);
+			start += size;
+		}
+	} else {
+		unsigned long rmask = 0xf000000000000000ul;
+		unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+		unsigned long vpte = address & ~rmask;
+
+#ifdef CONFIG_PPC_64K_PAGES
+		vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
+#else
+		vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+#endif
+		vpte |= rid;
+		__flush_tlb_page(tlb->mm, vpte, tsize, 0);
+	}
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void __early_init_mmu(int boot_cpu)
+{
+	extern unsigned int interrupt_base_book3e;
+	extern unsigned int exc_data_tlb_miss_htw_book3e;
+	extern unsigned int exc_instruction_tlb_miss_htw_book3e;
+
+	unsigned int *ibase = &interrupt_base_book3e;
+	unsigned int mas4;
+
+	/* XXX This will have to be decided at runtime, but right
+	 * now our boot and TLB miss code hard wires it
+	 */
+	mmu_linear_psize = MMU_PAGE_1G;
+
+
+	/* Check if HW tablewalk is present, and if yes, enable it by:
+	 *
+	 * - patching the TLB miss handlers to branch to the
+	 *   one dedicates to it
+	 *
+	 * - setting the global book3e_htw_enabled
+	 *
+	 * - Set MAS4:INDD and default page size
+	 */
+
+	/* XXX This code only checks for TLB 0 capabilities and doesn't
+	 *     check what page size combos are supported by the HW. It
+	 *     also doesn't handle the case where a separate array holds
+	 *     the IND entries from the array loaded by the PT.
+	 */
+	if (boot_cpu) {
+		unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+
+		/* Check if HW loader is supported */
+		if ((tlb0cfg & TLBnCFG_IND) &&
+		    (tlb0cfg & TLBnCFG_PT)) {
+			patch_branch(ibase + (0x1c0 / 4),
+			     (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
+			patch_branch(ibase + (0x1e0 / 4),
+			     (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
+			book3e_htw_enabled = 1;
+		}
+		pr_info("MMU: Book3E Page Tables %s\n",
+			book3e_htw_enabled ? "Enabled" : "Disabled");
+	}
+
+	/* Set MAS4 based on page table setting */
+
+	mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+	if (book3e_htw_enabled) {
+		mas4 |= mas4 | MAS4_INDD;
+#ifdef CONFIG_PPC_64K_PAGES
+		mas4 |=	BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
+		mmu_pte_psize = MMU_PAGE_256M;
+#else
+		mas4 |=	BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
+		mmu_pte_psize = MMU_PAGE_1M;
+#endif
+	} else {
+#ifdef CONFIG_PPC_64K_PAGES
+		mas4 |=	BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
+#else
+		mas4 |=	BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+#endif
+		mmu_pte_psize = mmu_virtual_psize;
+	}
+	mtspr(SPRN_MAS4, mas4);
+
+	/* Set the global containing the top of the linear mapping
+	 * for use by the TLB miss code
+	 */
+	linear_map_top = lmb_end_of_DRAM();
+
+	/* A sync won't hurt us after mucking around with
+	 * the MMU configuration
+	 */
+	mb();
+}
+
+void __init early_init_mmu(void)
+{
+	__early_init_mmu(1);
+}
+
+void __cpuinit early_init_mmu_secondary(void)
+{
+	__early_init_mmu(0);
+}
+
+#endif /* CONFIG_PPC64 */
Index: linux-work/arch/powerpc/include/asm/mmu_context.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu_context.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu_context.h	2009-07-24 18:15:31.000000000 +1000
@@ -43,6 +43,10 @@ static inline void switch_mm(struct mm_s
 	tsk->thread.pgdir = next->pgd;
 #endif /* CONFIG_PPC32 */
 
+	/* 64-bit Book3E keeps track of current PGD in the PACA */
+#ifdef CONFIG_PPC_BOOK3E_64
+	get_paca()->pgd = next->pgd;
+#endif
 	/* Nothing else to do if we aren't actually switching */
 	if (prev == next)
 		return;
@@ -89,6 +93,10 @@ static inline void activate_mm(struct mm
 static inline void enter_lazy_tlb(struct mm_struct *mm,
 				  struct task_struct *tsk)
 {
+ 	/* 64-bit Book3E keeps track of current PGD in the PACA */
+#ifdef CONFIG_PPC_BOOK3E_64
+      	get_paca()->pgd = NULL;
+#endif
 }
 
 #endif /* __KERNEL__ */
Index: linux-work/arch/powerpc/kernel/setup_64.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_64.c	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/kernel/setup_64.c	2009-07-24 18:15:34.000000000 +1000
@@ -62,6 +62,7 @@
 #include <asm/udbg.h>
 #include <asm/kexec.h>
 #include <asm/swiotlb.h>
+#include <asm/mmu_context.h>
 
 #include "setup.h"
 
@@ -147,6 +148,9 @@ void __init setup_paca(int cpu)
 {
 	local_paca = &paca[cpu];
 	mtspr(SPRN_SPRG_PACA, local_paca);
+#ifdef CONFIG_PPC_BOOK3E
+	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#endif
 }
 
 /*
Index: linux-work/arch/powerpc/mm/tlb_nohash_low.S
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash_low.S	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash_low.S	2009-07-24 18:15:31.000000000 +1000
@@ -191,6 +191,85 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_US
 	isync
 1:	wrtee	r10
 	blr
+#elif defined(CONFIG_PPC_BOOK3E)
+/*
+ * New Book3E (>= 2.06) implementation
+ *
+ * Note: We may be able to get away without the interrupt masking stuff
+ * if we save/restore MAS6 on exceptions that might modify it
+ */
+_GLOBAL(_tlbil_pid)
+	slwi	r4,r3,MAS6_SPID_SHIFT
+	mfmsr	r10
+	wrteei	0
+	mtspr	SPRN_MAS6,r4
+	PPC_TLBILX_PID(0,0)
+	wrtee	r10
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_pid_noind)
+	slwi	r4,r3,MAS6_SPID_SHIFT
+	mfmsr	r10
+	ori	r4,r4,MAS6_SIND
+	wrteei	0
+	mtspr	SPRN_MAS6,r4
+	PPC_TLBILX_PID(0,0)
+	wrtee	r10
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_all)
+	PPC_TLBILX_ALL(0,0)
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_va)
+	mfmsr	r10
+	wrteei	0
+	cmpwi	cr0,r6,0
+	slwi	r4,r4,MAS6_SPID_SHIFT
+	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+	beq	1f
+	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
+	PPC_TLBILX_VA(0,r3)
+	msync
+	isync
+	wrtee	r10
+	blr
+
+_GLOBAL(_tlbivax_bcast)
+	mfmsr	r10
+	wrteei	0
+	cmpwi	cr0,r6,0
+	slwi	r4,r4,MAS6_SPID_SHIFT
+	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+	beq	1f
+	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
+	PPC_TLBIVAX(0,r3)
+	eieio
+	tlbsync
+	sync
+	wrtee	r10
+	blr
+
+_GLOBAL(set_context)
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is the second parameter.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r4, 0x4(r5)
+#endif
+	mtspr	SPRN_PID,r3
+	isync			/* Force context change */
+	blr
 #else
 #error Unsupported processor type !
 #endif
Index: linux-work/arch/powerpc/include/asm/mmu-40x.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-40x.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-40x.h	2009-07-24 18:15:31.000000000 +1000
@@ -61,4 +61,7 @@ typedef struct {
 
 #endif /* !__ASSEMBLY__ */
 
+#define mmu_virtual_psize	MMU_PAGE_4K
+#define mmu_linear_psize	MMU_PAGE_256M
+
 #endif /* _ASM_POWERPC_MMU_40X_H_ */
Index: linux-work/arch/powerpc/include/asm/mmu-44x.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-44x.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-44x.h	2009-07-24 18:15:31.000000000 +1000
@@ -79,16 +79,22 @@ typedef struct {
 
 #if (PAGE_SHIFT == 12)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#define mmu_virtual_psize	MMU_PAGE_4K
 #elif (PAGE_SHIFT == 14)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
+#define mmu_virtual_psize	MMU_PAGE_16K
 #elif (PAGE_SHIFT == 16)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
+#define mmu_virtual_psize	MMU_PAGE_64K
 #elif (PAGE_SHIFT == 18)
 #define PPC44x_TLBE_SIZE	PPC44x_TLB_256K
+#define mmu_virtual_psize	MMU_PAGE_256K
 #else
 #error "Unsupported PAGE_SIZE"
 #endif
 
+#define mmu_linear_psize	MMU_PAGE_256M
+
 #define PPC44x_PGD_OFF_SHIFT	(32 - PGDIR_SHIFT + PGD_T_LOG2)
 #define PPC44x_PGD_OFF_MASK_BIT	(PGDIR_SHIFT - PGD_T_LOG2)
 #define PPC44x_PTE_ADD_SHIFT	(32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2)
Index: linux-work/arch/powerpc/include/asm/mmu-8xx.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-8xx.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-8xx.h	2009-07-24 18:15:31.000000000 +1000
@@ -143,4 +143,7 @@ typedef struct {
 } mm_context_t;
 #endif /* !__ASSEMBLY__ */
 
+#define mmu_virtual_psize	MMU_PAGE_4K
+#define mmu_linear_psize	MMU_PAGE_8M
+
 #endif /* _ASM_POWERPC_MMU_8XX_H_ */
Index: linux-work/arch/powerpc/include/asm/mmu-hash32.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-hash32.h	2009-07-24 18:14:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-hash32.h	2009-07-24 18:15:31.000000000 +1000
@@ -80,4 +80,10 @@ typedef struct {
 
 #endif /* !__ASSEMBLY__ */
 
+/* We happily ignore the smaller BATs on 601, we don't actually use
+ * those definitions on hash32 at the moment anyway
+ */
+#define mmu_virtual_psize	MMU_PAGE_4K
+#define mmu_linear_psize	MMU_PAGE_256M
+
 #endif /* _ASM_POWERPC_MMU_HASH32_H_ */

^ permalink raw reply

* [PATCH 19/20] powerpc/mm: Add support for SPARSEMEM_VMEMMAP on 64-bit Book3E
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

The base TLB support didn't include support for SPARSEMEM_VMEMMAP, though
we did carve out some virtual space for it, the necessary support code
wasn't there. This implements it by using 16M pages for now, though the
page size could easily be changed at runtime if necessary.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/include/asm/mmu-book3e.h    |    1 
 arch/powerpc/include/asm/pgtable-ppc64.h |    3 +
 arch/powerpc/mm/init_64.c                |   55 +++++++++++++++++++++++++++----
 arch/powerpc/mm/mmu_decl.h               |    7 +++
 arch/powerpc/mm/pgtable_64.c             |    2 -
 arch/powerpc/mm/tlb_nohash.c             |   11 +++++-
 6 files changed, 68 insertions(+), 11 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/mmu-book3e.h	2009-07-24 18:15:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-book3e.h	2009-07-24 18:23:51.000000000 +1000
@@ -196,6 +196,7 @@ extern struct mmu_psize_def mmu_psize_de
 #endif
 
 extern int mmu_linear_psize;
+extern int mmu_vmemmap_psize;
 
 #endif /* !__ASSEMBLY__ */
 
Index: linux-work/arch/powerpc/include/asm/pgtable-ppc64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgtable-ppc64.h	2009-07-24 18:15:35.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pgtable-ppc64.h	2009-07-24 18:23:51.000000000 +1000
@@ -46,6 +46,7 @@
 /*
  * The vmalloc space starts at the beginning of that region, and
  * occupies half of it on hash CPUs and a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
  */
 #define VMALLOC_START	KERN_VIRT_START
 #ifdef CONFIG_PPC_BOOK3E
@@ -83,7 +84,7 @@
 
 #define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
 #define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID	(0xfUL)
+#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
 #define USER_REGION_ID		(0UL)
 
 /*
Index: linux-work/arch/powerpc/mm/init_64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/init_64.c	2009-07-24 18:15:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/init_64.c	2009-07-24 18:23:51.000000000 +1000
@@ -205,6 +205,47 @@ static int __meminit vmemmap_populated(u
 	return 0;
 }
 
+/* On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+
+#ifdef CONFIG_PPC_BOOK3E
+static void __meminit vmemmap_create_mapping(unsigned long start,
+					     unsigned long page_size,
+					     unsigned long phys)
+{
+	/* Create a PTE encoding without page size */
+	unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+		_PAGE_KERNEL_RW;
+
+	/* PTEs only contain page size encodings up to 32M */
+	BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+	/* Encode the size in the PTE */
+	flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+	/* For each PTE for that area, map things. Note that we don't
+	 * increment phys because all PTEs are of the large size and
+	 * thus must have the low bits clear
+	 */
+	for (i = 0; i < page_size; i += PAGE_SIZE)
+		BUG_ON(map_kernel_page(start + i, phys, flags));
+}
+#else /* CONFIG_PPC_BOOK3E */
+static void __meminit vmemmap_create_mapping(unsigned long start,
+					     unsigned long page_size,
+					     unsigned long phys)
+{
+	int  mapped = htab_bolt_mapping(start, start + page_size, phys,
+					PAGE_KERNEL, mmu_vmemmap_psize,
+					mmu_kernel_ssize);
+	BUG_ON(mapped < 0);
+}
+#endif /* CONFIG_PPC_BOOK3E */
+
 int __meminit vmemmap_populate(struct page *start_page,
 			       unsigned long nr_pages, int node)
 {
@@ -215,8 +256,11 @@ int __meminit vmemmap_populate(struct pa
 	/* Align to the page size of the linear mapping. */
 	start = _ALIGN_DOWN(start, page_size);
 
+	pr_debug("vmemmap_populate page %p, %ld pages, node %d\n",
+		 start_page, nr_pages, node);
+	pr_debug(" -> map %lx..%lx\n", start, end);
+
 	for (; start < end; start += page_size) {
-		int mapped;
 		void *p;
 
 		if (vmemmap_populated(start, page_size))
@@ -226,13 +270,10 @@ int __meminit vmemmap_populate(struct pa
 		if (!p)
 			return -ENOMEM;
 
-		pr_debug("vmemmap %08lx allocated at %p, physical %08lx.\n",
-			start, p, __pa(p));
+		pr_debug("      * %016lx..%016lx allocated at %p\n",
+			 start, start + page_size, p);
 
-		mapped = htab_bolt_mapping(start, start + page_size, __pa(p),
-					   pgprot_val(PAGE_KERNEL),
-					   mmu_vmemmap_psize, mmu_kernel_ssize);
-		BUG_ON(mapped < 0);
+		vmemmap_create_mapping(start, page_size, __pa(p));
 	}
 
 	return 0;
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash.c	2009-07-24 18:21:24.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c	2009-07-24 18:23:51.000000000 +1000
@@ -93,6 +93,7 @@ static inline int mmu_get_tsize(int psiz
 
 int mmu_linear_psize;		/* Page size used for the linear mapping */
 int mmu_pte_psize;		/* Page size used for PTE pages */
+int mmu_vmemmap_psize;		/* Page size used for the virtual mem map */
 int book3e_htw_enabled;		/* Is HW tablewalk enabled ? */
 unsigned long linear_map_top;	/* Top of linear mapping */
 
@@ -356,10 +357,18 @@ static void __early_init_mmu(int boot_cp
 	unsigned int mas4;
 
 	/* XXX This will have to be decided at runtime, but right
-	 * now our boot and TLB miss code hard wires it
+	 * now our boot and TLB miss code hard wires it. Ideally
+	 * we should find out a suitable page size and patch the
+	 * TLB miss code (either that or use the PACA to store
+	 * the value we want)
 	 */
 	mmu_linear_psize = MMU_PAGE_1G;
 
+	/* XXX This should be decided at runtime based on supported
+	 * page sizes in the TLB, but for now let's assume 16M is
+	 * always there and a good fit (which it probably is)
+	 */
+	mmu_vmemmap_psize = MMU_PAGE_16M;
 
 	/* Check if HW tablewalk is present, and if yes, enable it by:
 	 *
Index: linux-work/arch/powerpc/mm/mmu_decl.h
===================================================================
--- linux-work.orig/arch/powerpc/mm/mmu_decl.h	2009-07-24 18:21:24.000000000 +1000
+++ linux-work/arch/powerpc/mm/mmu_decl.h	2009-07-24 18:23:51.000000000 +1000
@@ -121,7 +121,12 @@ extern unsigned int rtas_data, rtas_size
 struct hash_pte;
 extern struct hash_pte *Hash, *Hash_end;
 extern unsigned long Hash_size, Hash_mask;
-#endif
+
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
+#endif /* CONFIG_PPC64 */
 
 extern unsigned long ioremap_bot;
 extern unsigned long __max_low_memory;
Index: linux-work/arch/powerpc/mm/pgtable_64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/pgtable_64.c	2009-07-24 18:15:35.000000000 +1000
+++ linux-work/arch/powerpc/mm/pgtable_64.c	2009-07-24 18:23:51.000000000 +1000
@@ -79,7 +79,7 @@ static void *early_alloc_pgtable(unsigne
  * map_kernel_page adds an entry to the ioremap page table
  * and adds an entry to the HPT, possibly bolting it
  */
-static int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
 {
 	pgd_t *pgdp;
 	pud_t *pudp;

^ permalink raw reply

* [PATCH 20/20] powerpc: Remaining 64-bit Book3E support (v2)
From: Benjamin Herrenschmidt @ 2009-07-24  9:15 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1248426902.401617.944220131651.qpush@grosgo>

This contains all the bits that didn't fit in previous patches :-) This
includes the actual exception handlers assembly, the changes to the
kernel entry, other misc bits and wiring it all up in Kconfig.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

v2. Rebased due to changes in previous patches

 arch/powerpc/Kconfig                   |    2 
 arch/powerpc/include/asm/hw_irq.h      |    5 
 arch/powerpc/include/asm/smp.h         |    1 
 arch/powerpc/kernel/Makefile           |   10 
 arch/powerpc/kernel/cputable.c         |   27 +
 arch/powerpc/kernel/entry_64.S         |   60 ++
 arch/powerpc/kernel/exceptions-64e.S   |  784 +++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/head_64.S          |   68 ++
 arch/powerpc/kernel/setup_64.c         |   19 
 arch/powerpc/mm/Makefile               |    1 
 arch/powerpc/platforms/Kconfig.cputype |   38 +
 arch/powerpc/xmon/xmon.c               |    2 
 12 files changed, 993 insertions(+), 24 deletions(-)

--- linux-work.orig/arch/powerpc/kernel/Makefile	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/kernel/Makefile	2009-07-24 16:05:29.000000000 +1000
@@ -33,10 +33,10 @@ obj-y				:= cputable.o ptrace.o syscalls
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
-				   paca.o cpu_setup_ppc970.o \
-				   cpu_setup_pa6t.o \
-				   firmware.o nvram_64.o
+				   paca.o nvram_64.o firmware.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
+obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
@@ -63,8 +63,8 @@ obj-$(CONFIG_MODULES)		+= module.o modul
 obj-$(CONFIG_44x)		+= cpu_setup_44x.o
 obj-$(CONFIG_FSL_BOOKE)		+= cpu_setup_fsl_booke.o dbell.o
 
-extra-$(CONFIG_PPC_STD_MMU)	:= head_32.o
-extra-$(CONFIG_PPC64)		:= head_64.o
+extra-y				:= head_$(CONFIG_WORD_SIZE).o
+extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
Index: linux-work/arch/powerpc/kernel/exceptions-64e.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/kernel/exceptions-64e.S	2009-07-24 16:05:29.000000000 +1000
@@ -0,0 +1,784 @@
+/*
+ *  Boot code and exception vectors for Book3E processors
+ *
+ *  Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/reg.h>
+#include <asm/exception-64e.h>
+#include <asm/bug.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/mmu.h>
+
+/* XXX This will ultimately add space for a special exception save
+ *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
+ *     when taking special interrupts. For now we don't support that,
+ *     special interrupts from within a non-standard level will probably
+ *     blow you up
+ */
+#define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
+
+/* Exception prolog code for all exceptions */
+#define EXCEPTION_PROLOG(n, type, addition)				    \
+	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
+	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
+	std	r10,PACA_EX##type+EX_R10(r13);				    \
+	std	r11,PACA_EX##type+EX_R11(r13);				    \
+	mfcr	r10;			/* save CR */			    \
+	addition;			/* additional code for that exc. */ \
+	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
+	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
+	mfspr	r11,SPRN_##type##_SRR1;/* what are we coming from */	    \
+	type##_SET_KSTACK;		/* get special stack if necessary */\
+	andi.	r10,r11,MSR_PR;		/* save stack pointer */	    \
+	beq	1f;			/* branch around if supervisor */   \
+	ld	r1,PACAKSAVE(r13);	/* get kernel stack coming from usr */\
+1:	cmpdi	cr1,r1,0;		/* check if SP makes sense */	    \
+	bge-	cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
+	mfspr	r10,SPRN_##type##_SRR0;	/* read SRR0 before touching stack */
+
+/* Exception type-specific macros */
+#define	GEN_SET_KSTACK							    \
+	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack */
+#define SPRN_GEN_SRR0	SPRN_SRR0
+#define SPRN_GEN_SRR1	SPRN_SRR1
+
+#define CRIT_SET_KSTACK						            \
+	ld	r1,PACA_CRIT_STACK(r13);				    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_CRIT_SRR0	SPRN_CSRR0
+#define SPRN_CRIT_SRR1	SPRN_CSRR1
+
+#define DBG_SET_KSTACK						            \
+	ld	r1,PACA_DBG_STACK(r13);					    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_DBG_SRR0	SPRN_DSRR0
+#define SPRN_DBG_SRR1	SPRN_DSRR1
+
+#define MC_SET_KSTACK						            \
+	ld	r1,PACA_MC_STACK(r13);					    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_MC_SRR0	SPRN_MCSRR0
+#define SPRN_MC_SRR1	SPRN_MCSRR1
+
+#define NORMAL_EXCEPTION_PROLOG(n, addition)				    \
+	EXCEPTION_PROLOG(n, GEN, addition##_GEN)
+
+#define CRIT_EXCEPTION_PROLOG(n, addition)				    \
+	EXCEPTION_PROLOG(n, CRIT, addition##_CRIT)
+
+#define DBG_EXCEPTION_PROLOG(n, addition)				    \
+	EXCEPTION_PROLOG(n, DBG, addition##_DBG)
+
+#define MC_EXCEPTION_PROLOG(n, addition)				    \
+	EXCEPTION_PROLOG(n, MC, addition##_MC)
+
+
+/* Variants of the "addition" argument for the prolog
+ */
+#define PROLOG_ADDITION_NONE_GEN
+#define PROLOG_ADDITION_NONE_CRIT
+#define PROLOG_ADDITION_NONE_DBG
+#define PROLOG_ADDITION_NONE_MC
+
+#define PROLOG_ADDITION_MASKABLE_GEN					    \
+	lbz	r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */	    \
+	cmpwi	cr0,r11,0;		/* yes -> go out of line */	    \
+	beq	masked_interrupt_book3e;
+
+#define PROLOG_ADDITION_2REGS_GEN					    \
+	std	r14,PACA_EXGEN+EX_R14(r13);				    \
+	std	r15,PACA_EXGEN+EX_R15(r13)
+
+#define PROLOG_ADDITION_1REG_GEN					    \
+	std	r14,PACA_EXGEN+EX_R14(r13);
+
+#define PROLOG_ADDITION_2REGS_CRIT					    \
+	std	r14,PACA_EXCRIT+EX_R14(r13);				    \
+	std	r15,PACA_EXCRIT+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_DBG					    \
+	std	r14,PACA_EXDBG+EX_R14(r13);				    \
+	std	r15,PACA_EXDBG+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_MC					    \
+	std	r14,PACA_EXMC+EX_R14(r13);				    \
+	std	r15,PACA_EXMC+EX_R15(r13)
+
+/* Core exception code for all exceptions except TLB misses.
+ * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
+ */
+#define EXCEPTION_COMMON(n, excf, ints)					    \
+	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
+	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
+	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
+	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \
+	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \
+	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \
+	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \
+	ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */	    \
+	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \
+	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \
+	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */		    \
+	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
+	ld	r2,PACATOC(r13);	/* get kernel TOC into r2 */	    \
+	mflr	r6;			/* save LR in stackframe */	    \
+	mfctr	r7;			/* save CTR in stackframe */	    \
+	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
+	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
+	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
+	lbz	r11,PACASOFTIRQEN(r13);	/* get current IRQ softe */	    \
+	ld	r12,exception_marker@toc(r2);				    \
+	li	r0,0;							    \
+	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
+	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
+	std	r5,GPR13(r1);		/* save it to stackframe */	    \
+	std	r6,_LINK(r1);						    \
+	std	r7,_CTR(r1);						    \
+	std	r8,_XER(r1);						    \
+	li	r3,(n)+1;		/* indicate partial regs in trap */ \
+	std	r9,0(r1);		/* store stack frame back link */   \
+	std	r10,_CCR(r1);		/* store orig CR in stackframe */   \
+	std	r9,GPR1(r1);		/* store stack frame back link */   \
+	std	r11,SOFTE(r1);		/* and save it to stackframe */     \
+	std	r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */	    \
+	std	r3,_TRAP(r1);		/* set trap number		*/  \
+	std	r0,RESULT(r1);		/* clear regs->result */	    \
+	ints;
+
+/* Variants for the "ints" argument */
+#define INTS_KEEP
+#define INTS_DISABLE_SOFT						    \
+	stb	r0,PACASOFTIRQEN(r13);	/* mark interrupts soft-disabled */ \
+	TRACE_DISABLE_INTS;
+#define INTS_DISABLE_HARD						    \
+	stb	r0,PACAHARDIRQEN(r13); /* and hard disabled */
+#define INTS_DISABLE_ALL						    \
+	INTS_DISABLE_SOFT						    \
+	INTS_DISABLE_HARD
+
+/* This is called by exceptions that used INTS_KEEP (that is did not clear
+ * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE
+ * to it's previous value
+ *
+ * XXX In the long run, we may want to open-code it in order to separate the
+ *     load from the wrtee, thus limiting the latency caused by the dependency
+ *     but at this point, I'll favor code clarity until we have a near to final
+ *     implementation
+ */
+#define INTS_RESTORE_HARD						    \
+	ld	r11,_MSR(r1);						    \
+	wrtee	r11;
+
+/* XXX FIXME: Restore r14/r15 when necessary */
+#define BAD_STACK_TRAMPOLINE(n)						    \
+exc_##n##_bad_stack:							    \
+	li	r1,(n);			/* get exception number */	    \
+	sth	r1,PACA_TRAP_SAVE(r13);	/* store trap */		    \
+	b	bad_stack_book3e;	/* bad stack error */
+
+#define	EXCEPTION_STUB(loc, label)					\
+	. = interrupt_base_book3e + loc;				\
+	nop;	/* To make debug interrupts happy */			\
+	b	exc_##label##_book3e;
+
+#define ACK_NONE(r)
+#define ACK_DEC(r)							\
+	lis	r,TSR_DIS@h;						\
+	mtspr	SPRN_TSR,r
+#define ACK_FIT(r)							\
+	lis	r,TSR_FIS@h;						\
+	mtspr	SPRN_TSR,r
+
+#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)			\
+	START_EXCEPTION(label);						\
+	NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)	\
+	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL)		\
+	ack(r8);							\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
+	bl	hdlr;							\
+	b	.ret_from_except_lite;
+
+/* This value is used to mark exception frames on the stack. */
+	.section	".toc","aw"
+exception_marker:
+	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
+
+
+/*
+ * And here we have the exception vectors !
+ */
+
+	.text
+	.balign	0x1000
+	.globl interrupt_base_book3e
+interrupt_base_book3e:					/* fake trap */
+	/* Note: If real debug exceptions are supported by the HW, the vector
+	 * below will have to be patched up to point to an appropriate handler
+	 */
+	EXCEPTION_STUB(0x000, machine_check)		/* 0x0200 */
+	EXCEPTION_STUB(0x020, critical_input)		/* 0x0580 */
+	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
+	EXCEPTION_STUB(0x060, data_storage)		/* 0x0300 */
+	EXCEPTION_STUB(0x080, instruction_storage)	/* 0x0400 */
+	EXCEPTION_STUB(0x0a0, external_input)		/* 0x0500 */
+	EXCEPTION_STUB(0x0c0, alignment)		/* 0x0600 */
+	EXCEPTION_STUB(0x0e0, program)			/* 0x0700 */
+	EXCEPTION_STUB(0x100, fp_unavailable)		/* 0x0800 */
+	EXCEPTION_STUB(0x120, system_call)		/* 0x0c00 */
+	EXCEPTION_STUB(0x140, ap_unavailable)		/* 0x0f20 */
+	EXCEPTION_STUB(0x160, decrementer)		/* 0x0900 */
+	EXCEPTION_STUB(0x180, fixed_interval)		/* 0x0980 */
+	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
+	EXCEPTION_STUB(0x1c0, data_tlb_miss)
+	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+
+#if 0
+	EXCEPTION_STUB(0x280, processor_doorbell)
+	EXCEPTION_STUB(0x220, processor_doorbell_crit)
+#endif
+	.globl interrupt_end_book3e
+interrupt_end_book3e:
+
+/* Critical Input Interrupt */
+	START_EXCEPTION(critical_input);
+	CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
+//	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL)
+//	bl	special_reg_save_crit
+//	addi	r3,r1,STACK_FRAME_OVERHEAD
+//	bl	.critical_exception
+//	b	ret_from_crit_except
+	b	.
+
+/* Machine Check Interrupt */
+	START_EXCEPTION(machine_check);
+	CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
+//	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL)
+//	bl	special_reg_save_mc
+//	addi	r3,r1,STACK_FRAME_OVERHEAD
+//	bl	.machine_check_exception
+//	b	ret_from_mc_except
+	b	.
+
+/* Data Storage Interrupt */
+	START_EXCEPTION(data_storage)
+	NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
+	mfspr	r14,SPRN_DEAR
+	mfspr	r15,SPRN_ESR
+	EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP)
+	b	storage_fault_common
+
+/* Instruction Storage Interrupt */
+	START_EXCEPTION(instruction_storage);
+	NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
+	li	r15,0
+	mr	r14,r10
+	EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP)
+	b	storage_fault_common
+
+/* External Input Interrupt */
+	MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
+
+/* Alignment */
+	START_EXCEPTION(alignment);
+	NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
+	mfspr	r14,SPRN_DEAR
+	mfspr	r15,SPRN_ESR
+	EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
+	b	alignment_more	/* no room, go out of line */
+
+/* Program Interrupt */
+	START_EXCEPTION(program);
+	NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
+	mfspr	r14,SPRN_ESR
+	EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT)
+	std	r14,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	bl	.save_nvgprs
+	INTS_RESTORE_HARD
+	bl	.program_check_exception
+	b	.ret_from_except
+
+/* Floating Point Unavailable Interrupt */
+	START_EXCEPTION(fp_unavailable);
+	NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
+	/* we can probably do a shorter exception entry for that one... */
+	EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
+	bne	1f			/* if from user, just load it up */
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	INTS_RESTORE_HARD
+	bl	.kernel_fp_unavailable_exception
+	BUG_OPCODE
+1:	ld	r12,_MSR(r1)
+	bl	.load_up_fpu
+	b	fast_exception_return
+
+/* Decrementer Interrupt */
+	MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
+
+/* Fixed Interval Timer Interrupt */
+	MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT)
+
+/* Watchdog Timer Interrupt */
+	START_EXCEPTION(watchdog);
+	CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
+//	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL)
+//	bl	special_reg_save_crit
+//	addi	r3,r1,STACK_FRAME_OVERHEAD
+//	bl	.unknown_exception
+//	b	ret_from_crit_except
+	b	.
+
+/* System Call Interrupt */
+	START_EXCEPTION(system_call)
+	mr	r9,r13			/* keep a copy of userland r13 */
+	mfspr	r11,SPRN_SRR0		/* get return address */
+	mfspr	r12,SPRN_SRR1		/* get previous MSR */
+	mfspr	r13,SPRN_SPRG_PACA	/* get our PACA */
+	b	system_call_common
+
+/* Auxillary Processor Unavailable Interrupt */
+	START_EXCEPTION(ap_unavailable);
+	NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.save_nvgprs
+	INTS_RESTORE_HARD
+	bl	.unknown_exception
+	b	.ret_from_except
+
+/* Debug exception as a critical interrupt*/
+	START_EXCEPTION(debug_crit);
+	CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the CSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,DBSR_IC@h
+	beq+	1f
+
+	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+	LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,DBSR_IC@h		/* clear the IC event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the CSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_CSRR1,r11
+	lwz	r10,PACA_EXCRIT+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXCRIT+EX_R1(r13)
+	ld	r14,PACA_EXCRIT+EX_R14(r13)
+	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXCRIT+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXCRIT+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_CRIT_SCRATCH
+	rfci
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r15,SPRN_SPRG_CRIT_SCRATCH
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r15
+	mfspr	r14,SPRN_DBSR
+	EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL)
+	std	r14,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r14
+	ld	r14,PACA_EXCRIT+EX_R14(r13)
+	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	bl	.save_nvgprs
+	bl	.DebugException
+	b	.ret_from_except
+
+kernel_dbg_exc:
+	b	.	/* NYI */
+
+
+/*
+ * An interrupt came in while soft-disabled; clear EE in SRR1,
+ * clear paca->hard_enabled and return.
+ */
+masked_interrupt_book3e:
+	mtcr	r10
+	stb	r11,PACAHARDIRQEN(r13)
+	mfspr	r10,SPRN_SRR1
+	rldicl	r11,r10,48,1		/* clear MSR_EE */
+	rotldi	r10,r11,16
+	mtspr	SPRN_SRR1,r10
+	ld	r10,PACA_EXGEN+EX_R10(r13);	/* restore registers */
+	ld	r11,PACA_EXGEN+EX_R11(r13);
+	mfspr	r13,SPRN_SPRG_GEN_SCRATCH;
+	rfi
+	b	.
+
+/*
+ * This is called from 0x300 and 0x400 handlers after the prologs with
+ * r14 and r15 containing the fault address and error code, with the
+ * original values stashed away in the PACA
+ */
+storage_fault_common:
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r14
+	mr	r5,r15
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	INTS_RESTORE_HARD
+	bl	.do_page_fault
+	cmpdi	r3,0
+	bne-	1f
+	b	.ret_from_except_lite
+1:	bl	.save_nvgprs
+	mr	r5,r3
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ld	r4,_DAR(r1)
+	bl	.bad_page_fault
+	b	.ret_from_except
+
+/*
+ * Alignment exception doesn't fit entirely in the 0x100 bytes so it
+ * continues here.
+ */
+alignment_more:
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	bl	.save_nvgprs
+	INTS_RESTORE_HARD
+	bl	.alignment_exception
+	b	.ret_from_except
+
+/*
+ * We branch here from entry_64.S for the last stage of the exception
+ * return code path. MSR:EE is expected to be off at that point
+ */
+_GLOBAL(exception_return_book3e)
+	b	1f
+
+/* This is the return from load_up_fpu fast path which could do with
+ * less GPR restores in fact, but for now we have a single return path
+ */
+	.globl fast_exception_return
+fast_exception_return:
+	wrteei	0
+1:	mr	r0,r13
+	ld	r10,_MSR(r1)
+	REST_4GPRS(2, r1)
+	andi.	r6,r10,MSR_PR
+	REST_2GPRS(6, r1)
+	beq	1f
+	ACCOUNT_CPU_USER_EXIT(r10, r11)
+	ld	r0,GPR13(r1)
+
+1:	stdcx.	r0,0,r1		/* to clear the reservation */
+
+	ld	r8,_CCR(r1)
+	ld	r9,_LINK(r1)
+	ld	r10,_CTR(r1)
+	ld	r11,_XER(r1)
+	mtcr	r8
+	mtlr	r9
+	mtctr	r10
+	mtxer	r11
+	REST_2GPRS(8, r1)
+	ld	r10,GPR10(r1)
+	ld	r11,GPR11(r1)
+	ld	r12,GPR12(r1)
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r0
+
+	std	r10,PACA_EXGEN+EX_R10(r13);
+	std	r11,PACA_EXGEN+EX_R11(r13);
+	ld	r10,_NIP(r1)
+	ld	r11,_MSR(r1)
+	ld	r0,GPR0(r1)
+	ld	r1,GPR1(r1)
+	mtspr	SPRN_SRR0,r10
+	mtspr	SPRN_SRR1,r11
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_GEN_SCRATCH
+	rfi
+
+/*
+ * Trampolines used when spotting a bad kernel stack pointer in
+ * the exception entry code.
+ *
+ * TODO: move some bits like SRR0 read to trampoline, pass PACA
+ * index around, etc... to handle crit & mcheck
+ */
+BAD_STACK_TRAMPOLINE(0x000)
+BAD_STACK_TRAMPOLINE(0x100)
+BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x400)
+BAD_STACK_TRAMPOLINE(0x500)
+BAD_STACK_TRAMPOLINE(0x600)
+BAD_STACK_TRAMPOLINE(0x700)
+BAD_STACK_TRAMPOLINE(0x800)
+BAD_STACK_TRAMPOLINE(0x900)
+BAD_STACK_TRAMPOLINE(0x980)
+BAD_STACK_TRAMPOLINE(0x9f0)
+BAD_STACK_TRAMPOLINE(0xa00)
+BAD_STACK_TRAMPOLINE(0xb00)
+BAD_STACK_TRAMPOLINE(0xc00)
+BAD_STACK_TRAMPOLINE(0xd00)
+BAD_STACK_TRAMPOLINE(0xe00)
+BAD_STACK_TRAMPOLINE(0xf00)
+BAD_STACK_TRAMPOLINE(0xf20)
+
+	.globl	bad_stack_book3e
+bad_stack_book3e:
+	/* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
+	mfspr	r10,SPRN_SRR0;		  /* read SRR0 before touching stack */
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,64+INT_FRAME_SIZE
+	std	r10,_NIP(r1)
+	std	r11,_MSR(r1)
+	ld	r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
+	lwz	r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
+	std	r10,GPR1(r1)
+	std	r11,_CCR(r1)
+	mfspr	r10,SPRN_DEAR
+	mfspr	r11,SPRN_ESR
+	std	r10,_DAR(r1)
+	std	r11,_DSISR(r1)
+	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
+	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
+	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
+	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \
+	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \
+	ld	r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */		    \
+	ld	r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */		    \
+	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
+	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
+	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
+	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
+	std	r5,GPR13(r1);		/* save it to stackframe */	    \
+	mflr	r10
+	mfctr	r11
+	mfxer	r12
+	std	r10,_LINK(r1)
+	std	r11,_CTR(r1)
+	std	r12,_XER(r1)
+	SAVE_10GPRS(14,r1)
+	SAVE_8GPRS(24,r1)
+	lhz	r12,PACA_TRAP_SAVE(r13)
+	std	r12,_TRAP(r1)
+	addi	r11,r1,INT_FRAME_SIZE
+	std	r11,0(r1)
+	li	r12,0
+	std	r12,0(r11)
+	ld	r2,PACATOC(r13)
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.kernel_bad_stack
+	b	1b
+
+/*
+ * Setup the initial TLB for a core. This current implementation
+ * assume that whatever we are running off will not conflict with
+ * the new mapping at PAGE_OFFSET.
+ * We also make various assumptions about the processor we run on,
+ * this might have to be made more flexible based on the content
+ * of MMUCFG and friends.
+ */
+_GLOBAL(initial_tlb_book3e)
+
+	/* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
+	 * kernel linear mapping. We also set MAS8 once for all here though
+	 * that will have to be made dependent on whether we are running under
+	 * a hypervisor I suppose.
+	 */
+	li	r3,MAS0_HES | MAS0_WQ_ALLWAYS
+	mtspr	SPRN_MAS0,r3
+	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
+	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
+	mtspr	SPRN_MAS1,r3
+	LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M)
+	mtspr	SPRN_MAS2,r3
+	li	r3,MAS3_SR | MAS3_SW | MAS3_SX
+	mtspr	SPRN_MAS7_MAS3,r3
+	li	r3,0
+	mtspr	SPRN_MAS8,r3
+
+	/* Write the TLB entry */
+	tlbwe
+
+	/* Now we branch the new virtual address mapped by this entry */
+	LOAD_REG_IMMEDIATE(r3,1f)
+	mtctr	r3
+	bctr
+
+1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
+	 * else (XXX we should scan for bolted crap from the firmware too)
+	 */
+	PPC_TLBILX(0,0,0)
+	sync
+	isync
+
+	/* We translate LR and return */
+	mflr	r3
+	tovirt(r3,r3)
+	mtlr	r3
+	blr
+
+/*
+ * Main entry (boot CPU, thread 0)
+ *
+ * We enter here from head_64.S, possibly after the prom_init trampoline
+ * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
+ * mode. Anything else is as it was left by the bootloader
+ *
+ * Initial requirements of this port:
+ *
+ * - Kernel loaded at 0 physical
+ * - A good lump of memory mapped 0:0 by UTLB entry 0
+ * - MSR:IS & MSR:DS set to 0
+ *
+ * Note that some of the above requirements will be relaxed in the future
+ * as the kernel becomes smarter at dealing with different initial conditions
+ * but for now you have to be careful
+ */
+_GLOBAL(start_initialization_book3e)
+	mflr	r28
+
+	/* First, we need to setup some initial TLBs to map the kernel
+	 * text, data and bss at PAGE_OFFSET. We don't have a real mode
+	 * and always use AS 0, so we just set it up to match our link
+	 * address and never use 0 based addresses.
+	 */
+	bl	.initial_tlb_book3e
+
+	/* Init global core bits */
+	bl	.init_core_book3e
+
+	/* Init per-thread bits */
+	bl	.init_thread_book3e
+
+	/* Return to common init code */
+	tovirt(r28,r28)
+	mtlr	r28
+	blr
+
+
+/*
+ * Secondary core/processor entry
+ *
+ * This is entered for thread 0 of a secondary core, all other threads
+ * are expected to be stopped. It's similar to start_initialization_book3e
+ * except that it's generally entered from the holding loop in head_64.S
+ * after CPUs have been gathered by Open Firmware.
+ *
+ * We assume we are in 32 bits mode running with whatever TLB entry was
+ * set for us by the firmware or POR engine.
+ */
+_GLOBAL(book3e_secondary_core_init_tlb_set)
+	li	r4,1
+	b	.generic_secondary_smp_init
+
+_GLOBAL(book3e_secondary_core_init)
+	mflr	r28
+
+	/* Do we need to setup initial TLB entry ? */
+	cmplwi	r4,0
+	bne	2f
+
+	/* Setup TLB for this core */
+	bl	.initial_tlb_book3e
+
+	/* We can return from the above running at a different
+	 * address, so recalculate r2 (TOC)
+	 */
+	bl	.relative_toc
+
+	/* Init global core bits */
+2:	bl	.init_core_book3e
+
+	/* Init per-thread bits */
+3:	bl	.init_thread_book3e
+
+	/* Return to common init code at proper virtual address.
+	 *
+	 * Due to various previous assumptions, we know we entered this
+	 * function at either the final PAGE_OFFSET mapping or using a
+	 * 1:1 mapping at 0, so we don't bother doing a complicated check
+	 * here, we just ensure the return address has the right top bits.
+	 *
+	 * Note that if we ever want to be smarter about where we can be
+	 * started from, we have to be careful that by the time we reach
+	 * the code below we may already be running at a different location
+	 * than the one we were called from since initial_tlb_book3e can
+	 * have moved us already.
+	 */
+	cmpdi	cr0,r28,0
+	blt	1f
+	lis	r3,PAGE_OFFSET@highest
+	sldi	r3,r3,32
+	or	r28,r28,r3
+1:	mtlr	r28
+	blr
+
+_GLOBAL(book3e_secondary_thread_init)
+	mflr	r28
+	b	3b
+
+_STATIC(init_core_book3e)
+	/* Establish the interrupt vector base */
+	LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+	mtspr	SPRN_IVPR,r3
+	sync
+	blr
+
+_STATIC(init_thread_book3e)
+	lis	r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
+	mtspr	SPRN_EPCR,r3
+
+	/* Make sure interrupts are off */
+	wrteei	0
+
+	/* disable watchdog and FIT and enable DEC interrupts */
+	lis	r3,TCR_DIE@h
+	mtspr	SPRN_TCR,r3
+
+	blr
+
+
+
Index: linux-work/arch/powerpc/kernel/head_64.S
===================================================================
--- linux-work.orig/arch/powerpc/kernel/head_64.S	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/kernel/head_64.S	2009-07-24 16:05:29.000000000 +1000
@@ -121,10 +121,11 @@ __run_at_load:
  */
 	.globl	__secondary_hold
 __secondary_hold:
+#ifndef CONFIG_PPC_BOOK3E
 	mfmsr	r24
 	ori	r24,r24,MSR_RI
 	mtmsrd	r24			/* RI on */
-
+#endif
 	/* Grab our physical cpu number */
 	mr	r24,r3
 
@@ -143,6 +144,7 @@ __secondary_hold:
 	ld	r4,0(r4)		/* deref function descriptor */
 	mtctr	r4
 	mr	r3,r24
+	li	r4,0
 	bctr
 #else
 	BUG_OPCODE
@@ -163,21 +165,49 @@ exception_marker:
 #include "exceptions-64s.S"
 #endif
 
+_GLOBAL(generic_secondary_thread_init)
+	mr	r24,r3
+
+	/* turn on 64-bit mode */
+	bl	.enable_64b_mode
+
+	/* get a valid TOC pointer, wherever we're mapped at */
+	bl	.relative_toc
+
+#ifdef CONFIG_PPC_BOOK3E
+	/* Book3E initialization */
+	mr	r3,r24
+	bl	.book3e_secondary_thread_init
+#endif
+	b	generic_secondary_common_init
 
 /*
  * On pSeries and most other platforms, secondary processors spin
  * in the following code.
  * At entry, r3 = this processor's number (physical cpu id)
+ *
+ * On Book3E, r4 = 1 to indicate that the initial TLB entry for
+ * this core already exists (setup via some other mechanism such
+ * as SCOM before entry).
  */
 _GLOBAL(generic_secondary_smp_init)
 	mr	r24,r3
-	
+	mr	r25,r4
+
 	/* turn on 64-bit mode */
 	bl	.enable_64b_mode
 
-	/* get the TOC pointer (real address) */
+	/* get a valid TOC pointer, wherever we're mapped at */
 	bl	.relative_toc
 
+#ifdef CONFIG_PPC_BOOK3E
+	/* Book3E initialization */
+	mr	r3,r24
+	mr	r4,r25
+	bl	.book3e_secondary_core_init
+#endif
+
+generic_secondary_common_init:
 	/* Set up a paca value for this processor. Since we have the
 	 * physical cpu id in r24, we need to search the pacas to find
 	 * which logical id maps to our physical one.
@@ -196,6 +226,11 @@ _GLOBAL(generic_secondary_smp_init)
 	b	.kexec_wait		/* next kernel might do better	 */
 
 2:	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG */
+#ifdef CONFIG_PPC_BOOK3E
+	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
+#endif
+
 	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
 3:	HMT_LOW
@@ -231,6 +266,7 @@ _GLOBAL(generic_secondary_smp_init)
  * Turn the MMU off.
  * Assumes we're mapped EA == RA if the MMU is on.
  */
+#ifdef CONFIG_PPC_BOOK3S
 _STATIC(__mmu_off)
 	mfmsr	r3
 	andi.	r0,r3,MSR_IR|MSR_DR
@@ -242,6 +278,7 @@ _STATIC(__mmu_off)
 	sync
 	rfid
 	b	.	/* prevent speculative execution */
+#endif
 
 
 /*
@@ -279,6 +316,10 @@ _GLOBAL(__start_initialization_multiplat
 	mr	r31,r3
 	mr	r30,r4
 
+#ifdef CONFIG_PPC_BOOK3E
+	bl	.start_initialization_book3e
+	b	.__after_prom_start
+#else
 	/* Setup some critical 970 SPRs before switching MMU off */
 	mfspr	r0,SPRN_PVR
 	srwi	r0,r0,16
@@ -296,6 +337,7 @@ _GLOBAL(__start_initialization_multiplat
 	/* Switch off MMU if not already off */
 	bl	.__mmu_off
 	b	.__after_prom_start
+#endif /* CONFIG_PPC_BOOK3E */
 
 _INIT_STATIC(__boot_from_prom)
 #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
@@ -358,10 +400,16 @@ _STATIC(__after_prom_start)
  * Note: This process overwrites the OF exception vectors.
  */
 	li	r3,0			/* target addr */
+#ifdef CONFIG_PPC_BOOK3E
+	tovirt(r3,r3)			/* on booke, we already run at PAGE_OFFSET */
+#endif
 	mr.	r4,r26			/* In some cases the loader may  */
 	beq	9f			/* have already put us at zero */
 	li	r6,0x100		/* Start offset, the first 0x100 */
 					/* bytes were copied earlier.	 */
+#ifdef CONFIG_PPC_BOOK3E
+	tovirt(r6,r6)			/* on booke, we already run at PAGE_OFFSET */
+#endif
 
 #ifdef CONFIG_CRASH_DUMP
 /*
@@ -507,6 +555,9 @@ _GLOBAL(pmac_secondary_start)
  *   r13       = paca virtual address
  *   SPRG_PACA = paca virtual address
  */
+	.section ".text";
+	.align 2 ;
+
 	.globl	__secondary_start
 __secondary_start:
 	/* Set thread priority to MEDIUM */
@@ -543,7 +594,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISER
 
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	rfid
+	RFI
 	b	.	/* prevent speculative execution */
 
 /* 
@@ -564,11 +615,16 @@ _GLOBAL(start_secondary_prolog)
  */
 _GLOBAL(enable_64b_mode)
 	mfmsr	r11			/* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3E
+	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
+	mtmsr	r11
+#else /* CONFIG_PPC_BOOK3E */
 	li	r12,(MSR_SF | MSR_ISF)@highest
 	sldi	r12,r12,48
 	or	r11,r11,r12
 	mtmsrd	r11
 	isync
+#endif
 	blr
 
 /*
@@ -612,9 +668,11 @@ _INIT_STATIC(start_here_multiplatform)
 	bdnz	3b
 4:
 
+#ifndef CONFIG_PPC_BOOK3E
 	mfmsr	r6
 	ori	r6,r6,MSR_RI
 	mtmsrd	r6			/* RI on */
+#endif
 
 #ifdef CONFIG_RELOCATABLE
 	/* Save the physical address we're running at in kernstart_addr */
@@ -647,7 +705,7 @@ _INIT_STATIC(start_here_multiplatform)
 	ld	r4,PACAKMSR(r13)
 	mtspr	SPRN_SRR0,r3
 	mtspr	SPRN_SRR1,r4
-	rfid
+	RFI
 	b	.	/* prevent speculative execution */
 	
 	/* This is where all platforms converge execution */
Index: linux-work/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-work.orig/arch/powerpc/kernel/entry_64.S	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/kernel/entry_64.S	2009-07-24 16:05:29.000000000 +1000
@@ -120,9 +120,15 @@ BEGIN_FW_FTR_SECTION
 2:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
 #endif /* CONFIG_PPC_ISERIES */
+
+	/* Hard enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	1
+#else
 	mfmsr	r11
 	ori	r11,r11,MSR_EE
 	mtmsrd	r11,1
+#endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef SHOW_SYSCALLS
 	bl	.do_show_syscall
@@ -168,15 +174,25 @@ syscall_exit:
 #endif
 	clrrdi	r12,r1,THREAD_SHIFT
 
-	/* disable interrupts so current_thread_info()->flags can't change,
-	   and so that we don't get interrupted after loading SRR0/1. */
 	ld	r8,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S
+	/* No MSR:RI on BookE */
 	andi.	r10,r8,MSR_RI
 	beq-	unrecov_restore
+#endif
+
+	/* Disable interrupts so current_thread_info()->flags can't change,
+	 * and so that we don't get interrupted after loading SRR0/1.
+	 */
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	0
+#else
 	mfmsr	r10
 	rldicl	r10,r10,48,1
 	rotldi	r10,r10,16
 	mtmsrd	r10,1
+#endif /* CONFIG_PPC_BOOK3E */
+
 	ld	r9,TI_FLAGS(r12)
 	li	r11,-_LAST_ERRNO
 	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
@@ -194,9 +210,13 @@ syscall_error_cont:
 	 * userspace and we take an exception after restoring r13,
 	 * we end up corrupting the userspace r13 value.
 	 */
+#ifdef CONFIG_PPC_BOOK3S
+	/* No MSR:RI on BookE */
 	li	r12,MSR_RI
 	andc	r11,r10,r12
 	mtmsrd	r11,1			/* clear MSR.RI */
+#endif /* CONFIG_PPC_BOOK3S */
+
 	beq-	1f
 	ACCOUNT_CPU_USER_EXIT(r11, r12)
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
@@ -206,7 +226,7 @@ syscall_error_cont:
 	mtcr	r5
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
-	rfid
+	RFI
 	b	.	/* prevent speculative execution */
 
 syscall_error:	
@@ -276,9 +296,13 @@ syscall_exit_work:
 	beq	.ret_from_except_lite
 
 	/* Re-enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	1
+#else
 	mfmsr	r10
 	ori	r10,r10,MSR_EE
 	mtmsrd	r10,1
+#endif /* CONFIG_PPC_BOOK3E */
 
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -380,7 +404,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0
-	mtmsrd	r22
+	MTMSRD(r22)
 	isync
 1:	std	r20,_NIP(r1)
 	mfcr	r23
@@ -399,6 +423,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	std	r6,PACACURRENT(r13)	/* Set new 'current' */
 
 	ld	r8,KSP(r4)	/* new stack pointer */
+#ifdef CONFIG_PPC_BOOK3S
 BEGIN_FTR_SECTION
   BEGIN_FTR_SECTION_NESTED(95)
 	clrrdi	r6,r8,28	/* get its ESID */
@@ -445,8 +470,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
 	slbmte	r7,r0
 	isync
-
 2:
+#endif /* !CONFIG_PPC_BOOK3S */
+
 	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */
 	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
 	   because we don't need to leave the 288-byte ABI gap at the
@@ -490,10 +516,14 @@ _GLOBAL(ret_from_except_lite)
 	 * can't change between when we test it and when we return
 	 * from the interrupt.
 	 */
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	0
+#else
 	mfmsr	r10		/* Get current interrupt state */
 	rldicl	r9,r10,48,1	/* clear MSR_EE */
 	rotldi	r9,r9,16
 	mtmsrd	r9,1		/* Update machine state */
+#endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PREEMPT
 	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
@@ -540,6 +570,9 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
 	stb	r4,PACAHARDIRQEN(r13)
 
+#ifdef CONFIG_PPC_BOOK3E
+	b	.exception_return_book3e
+#else
 	ld	r4,_CTR(r1)
 	ld	r0,_LINK(r1)
 	mtctr	r4
@@ -588,6 +621,8 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
 	rfid
 	b	.	/* prevent speculative execution */
 
+#endif /* CONFIG_PPC_BOOK3E */
+
 iseries_check_pending_irqs:
 #ifdef CONFIG_PPC_ISERIES
 	ld	r5,SOFTE(r1)
@@ -638,6 +673,11 @@ do_work:
 	li	r0,1
 	stb	r0,PACASOFTIRQEN(r13)
 	stb	r0,PACAHARDIRQEN(r13)
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	1
+	bl	.preempt_schedule
+	wrteei	0
+#else
 	ori	r10,r10,MSR_EE
 	mtmsrd	r10,1		/* reenable interrupts */
 	bl	.preempt_schedule
@@ -646,6 +686,7 @@ do_work:
 	rldicl	r10,r10,48,1	/* disable interrupts again */
 	rotldi	r10,r10,16
 	mtmsrd	r10,1
+#endif /* CONFIG_PPC_BOOK3E */
 	ld	r4,TI_FLAGS(r9)
 	andi.	r0,r4,_TIF_NEED_RESCHED
 	bne	1b
@@ -654,8 +695,12 @@ do_work:
 user_work:
 #endif
 	/* Enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+	wrteei	1
+#else
 	ori	r10,r10,MSR_EE
 	mtmsrd	r10,1
+#endif /* CONFIG_PPC_BOOK3E */
 
 	andi.	r0,r4,_TIF_NEED_RESCHED
 	beq	1f
@@ -837,6 +882,10 @@ _GLOBAL(enter_prom)
 
 	/* Switch MSR to 32 bits mode
 	 */
+#ifdef CONFIG_PPC_BOOK3E
+	rlwinm	r11,r11,0,1,31
+	mtmsr	r11
+#else /* CONFIG_PPC_BOOK3E */
         mfmsr   r11
         li      r12,1
         rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
@@ -845,6 +894,7 @@ _GLOBAL(enter_prom)
         rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
         andc    r11,r11,r12
         mtmsrd  r11
+#endif /* CONFIG_PPC_BOOK3E */
         isync
 
 	/* Enter PROM here... */
Index: linux-work/arch/powerpc/kernel/cputable.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/cputable.c	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/kernel/cputable.c	2009-07-24 16:05:29.000000000 +1000
@@ -93,7 +93,7 @@ extern void __restore_cpu_power7(void);
 				 PPC_FEATURE_BOOKE)
 
 static struct cpu_spec __initdata cpu_specs[] = {
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 	{	/* Power3 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00400000,
@@ -508,7 +508,30 @@ static struct cpu_spec __initdata cpu_sp
 		.machine_check		= machine_check_generic,
 		.platform		= "power4",
 	}
-#endif	/* CONFIG_PPC64 */
+#endif	/* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_PPC_BOOK3E_64
+	{	/* This is a default entry to get going, to be replaced by
+		 * a real one at some stage
+		 */
+#define CPU_FTRS_BASE_BOOK3E	(CPU_FTR_USE_TB | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
+	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "Book3E",
+		.cpu_features		= CPU_FTRS_BASE_BOOK3E,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
+					  MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 0,
+		.machine_check		= machine_check_generic,
+		.platform		= "power6",
+	},
+#endif
+
 #ifdef CONFIG_PPC32
 #if CLASSIC_PPC
 	{	/* 601 */
Index: linux-work/arch/powerpc/mm/Makefile
===================================================================
--- linux-work.orig/arch/powerpc/mm/Makefile	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/mm/Makefile	2009-07-24 16:05:29.000000000 +1000
@@ -13,6 +13,7 @@ obj-y				:= fault.o mem.o pgtable.o gup.
 				   pgtable_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 				   tlb_nohash_low.o
+obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 obj-$(CONFIG_PPC64)		+= mmap_64.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_STD_MMU_64)	+= hash_utils_64.o \
Index: linux-work/arch/powerpc/kernel/setup_64.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_64.c	2009-07-24 16:05:25.000000000 +1000
+++ linux-work/arch/powerpc/kernel/setup_64.c	2009-07-24 16:05:29.000000000 +1000
@@ -454,6 +454,24 @@ static void __init irqstack_early_init(v
 #define irqstack_early_init()
 #endif
 
+#ifdef CONFIG_PPC_BOOK3E
+static void __init exc_lvl_early_init(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i) {
+		critirq_ctx[i] = (struct thread_info *)
+			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+		dbgirq_ctx[i] = (struct thread_info *)
+			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+		mcheckirq_ctx[i] = (struct thread_info *)
+			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+	}
+}
+#else
+#define exc_lvl_early_init()
+#endif
+
 /*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled.
@@ -513,6 +531,7 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.brk = klimit;
 	
 	irqstack_early_init();
+	exc_lvl_early_init();
 	emergency_stack_init();
 
 #ifdef CONFIG_PPC_STD_MMU_64
Index: linux-work/arch/powerpc/include/asm/hw_irq.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/hw_irq.h	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/hw_irq.h	2009-07-24 16:05:29.000000000 +1000
@@ -49,8 +49,13 @@ extern void iseries_handle_interrupts(vo
 #define raw_irqs_disabled()		(local_get_flags() == 0)
 #define raw_irqs_disabled_flags(flags)	((flags) == 0)
 
+#ifdef CONFIG_PPC_BOOK3E
+#define __hard_irq_enable()	__asm__ __volatile__("wrteei 1": : :"memory");
+#define __hard_irq_disable()	__asm__ __volatile__("wrteei 0": : :"memory");
+#else
 #define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
 #define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
+#endif
 
 #define  hard_irq_disable()			\
 	do {					\
Index: linux-work/arch/powerpc/xmon/xmon.c
===================================================================
--- linux-work.orig/arch/powerpc/xmon/xmon.c	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/xmon/xmon.c	2009-07-24 16:05:29.000000000 +1000
@@ -2570,7 +2570,7 @@ static void xmon_print_symbol(unsigned l
 	printf("%s", after);
 }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 static void dump_slb(void)
 {
 	int i;
Index: linux-work/arch/powerpc/platforms/Kconfig.cputype
===================================================================
--- linux-work.orig/arch/powerpc/platforms/Kconfig.cputype	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/platforms/Kconfig.cputype	2009-07-24 16:05:29.000000000 +1000
@@ -57,15 +57,35 @@ config E200
 
 endchoice
 
-config PPC_BOOK3S_64
-	def_bool y
+choice
+	prompt "Processor Type"
 	depends on PPC64
+	help
+	  There are two families of 64 bit PowerPC chips supported.
+	  The most common ones are the desktop and server CPUs
+	  (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...)
+
+	  The other are the "embedded" processors compliant with the
+	  "Book 3E" variant of the architecture
+
+config PPC_BOOK3S_64
+	bool "Server processors"
 	select PPC_FPU
 
+config PPC_BOOK3E_64
+	bool "Embedded processors"
+	select PPC_FPU # Make it a choice ?
+
+endchoice
+
 config PPC_BOOK3S
 	def_bool y
 	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
 
+config PPC_BOOK3E
+	def_bool y
+	depends on PPC_BOOK3E_64
+
 config POWER4_ONLY
 	bool "Optimize for POWER4"
 	depends on PPC64 && PPC_BOOK3S
@@ -125,7 +145,7 @@ config 4xx
 
 config BOOKE
 	bool
-	depends on E200 || E500 || 44x
+	depends on E200 || E500 || 44x || PPC_BOOK3E
 	default y
 
 config FSL_BOOKE
@@ -223,9 +243,17 @@ config PPC_MMU_NOHASH
 	def_bool y
 	depends on !PPC_STD_MMU
 
+config PPC_MMU_NOHASH_32
+	def_bool y
+	depends on PPC_MMU_NOHASH && PPC32
+
+config PPC_MMU_NOHASH_64
+	def_bool y
+	depends on PPC_MMU_NOHASH && PPC64
+
 config PPC_BOOK3E_MMU
 	def_bool y
-	depends on FSL_BOOKE
+	depends on FSL_BOOKE || PPC_BOOK3E
 
 config PPC_MM_SLICES
 	bool
@@ -257,7 +285,7 @@ config PPC_PERF_CTRS
          This enables the powerpc-specific perf_counter back-end.
 
 config SMP
-	depends on PPC_STD_MMU || FSL_BOOKE
+	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE
 	bool "Symmetric multi-processing support"
 	---help---
 	  This enables support for systems with more than one CPU. If you have
Index: linux-work/arch/powerpc/include/asm/smp.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/smp.h	2009-07-24 16:03:50.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/smp.h	2009-07-24 16:05:46.000000000 +1000
@@ -153,6 +153,7 @@ extern void arch_send_call_function_ipi(
  * 64-bit but defining them all here doesn't harm
  */
 extern void generic_secondary_smp_init(void);
+extern void generic_secondary_thread_init(void);
 extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
Index: linux-work/arch/powerpc/Kconfig
===================================================================
--- linux-work.orig/arch/powerpc/Kconfig	2009-07-24 15:06:38.000000000 +1000
+++ linux-work/arch/powerpc/Kconfig	2009-07-24 16:05:29.000000000 +1000
@@ -472,7 +472,7 @@ config PPC_16K_PAGES
 	bool "16k page size" if 44x
 
 config PPC_64K_PAGES
-	bool "64k page size" if 44x || PPC_STD_MMU_64
+	bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64
 	select PPC_HAS_HASH_64K if PPC_STD_MMU_64
 
 config PPC_256K_PAGES

^ permalink raw reply

* Re: [PATCH 0/5] Generic NVRAM support for large MMIO devices
From: Martyn Welch @ 2009-07-24  9:25 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, akpm
In-Reply-To: <1248335077.3367.79.camel@pasglop>

Benjamin Herrenschmidt wrote:
> On Thu, 2009-07-02 at 17:12 +0100, Martyn Welch wrote:
>   
>> The following series allows the generic NVRAM driver to access MMIO
>> based NVRAMs. In addition it enables support for NVRAMs of sizes
>> differing from those found on PowerPC Macs (providing a safe
>> fallback). Patches are also included to enable support for the NVRAM
>> found on the GE Fanuc PPC9A, SBC310 and SBC610.
>>
>> If this patch series is unsuitable this late in the day for 2.6.31,
>> please concider it for 2.6.32.
>>     
>
> No major issue with the series other than the change to the generic
> nvram code which needs to not break other architectures :-) Also,
> it will probably need to go through Andrew Morton, unless there's
> a maintainer for that driver, is there ?
>   
I'm not aware of a maintainer for the "generic nvram" driver. As 
mentioned in my other email, I'm fairly confident that this driver 
(/drivers/char/generic_nvram.c) is only used on PowerPC. I'm beginning 
to enter what is for me uncharted territory :-) How should I proceed? 
Are you happy to take the entire patch series, do I send the entire 
patch series to Andrew and LKML or just patch 2/5?

Martyn
> Once you sort out that aspect of the patch series, I'm happy to take
> the rest in powerpc.git
>
> Cheers,
> Ben.
>
>   


-- 
Martyn Welch MEng MPhil MIET (Principal Software Engineer)   T:+44(0)1327322748
GE Fanuc Intelligent Platforms Ltd,        |Registered in England and Wales
Tove Valley Business Park, Towcester,      |(3828642) at 100 Barbirolli Square,
Northants, NN12 6PF, UK T:+44(0)1327359444 |Manchester,M2 3AB  VAT:GB 927559189

^ permalink raw reply

* Re: [PATCH 0/5] Generic NVRAM support for large MMIO devices
From: Benjamin Herrenschmidt @ 2009-07-24 10:35 UTC (permalink / raw)
  To: Martyn Welch; +Cc: linuxppc-dev, akpm
In-Reply-To: <4A697E22.2080609@gefanuc.com>


> I'm not aware of a maintainer for the "generic nvram" driver. As 
> mentioned in my other email, I'm fairly confident that this driver 
> (/drivers/char/generic_nvram.c) is only used on PowerPC. I'm beginning 
> to enter what is for me uncharted territory :-) How should I proceed? 
> Are you happy to take the entire patch series, do I send the entire 
> patch series to Andrew and LKML or just patch 2/5?

I'll dbl check, if it's only ppc then I'll take the whole series,

As usual, if you don't see things happening by end of next week, ping
me :-) But it's on patchwork so hopefully it won't fall through the
cracks.

Cheers,
Ben.

^ permalink raw reply

* Re: BUG in dma-mapping.h:218 // MESH SCSI driver not working
From: Stef Simoens @ 2009-07-24 11:55 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1248425553.25467.20.camel@pasglop>

Hello Ben,

Thank you for your reply.

> On Fri, 2009-07-24 at 00:18 +0200, Stef Simoens wrote:
>> I tried the latest 2.6.31-rc3-git3 (without any other patch).
>> However, I have the same behaviour as the patched 2.6.30 (so: no BUG,
>> but the mesh_abort messages).
> Would it be possible for you to roughly find out at what kernel version
> it stopped working ? (Some kernels may need my patch to avoid crashing)

I am currently running 2.6.29-gentoo-r5 (that's somewhere at the end of
2.6.29, probably 2.6.29.5).

I compiled 2.6.30 as soon as it came 'stable'.
In any version of 2.6.30,  I encounter the BUG (dma-mapping.h:218).

I didn't react immediately, I actually guessed that the problem would have
been reported and solved in another 2.6.30.x.
Because it didn't, I started browsing the mailing-list (and found your
patch).
2.6.30-gentoo-r3 with your patch applied doesn't give the bug,
but gives the mesh_abort.

Before asking the question, I wanted to build the latest 2.6.31-rc
available to make sure my problem didn't get solved in the meantime.
2.6.31-rc3 gives the same mesh_abort.

Would you like me to try all the linux-2.6.30-rc?
Could you give me your best guess starting-point?

I know that there exists something as git-disect ... but I have never used
git (there always needs to be the first time, of course).

Kind regards,

Stef

^ permalink raw reply

* [PATCH] Add support for the ESTeem 195E (PPC405EP) SBC
From: Solomon Peachy @ 2009-07-24 15:21 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Solomon Peachy

This patch adds support for the ESTeem 195E Hotfoot SBC.
I've been maintaining this out-of-tree for some time now for
older kernels, but recently I ported it to the new unified powerpc 
tree with the intent of pushing it upstream.

The 195E boards use ancient versions of u-boot and a slightly mangled
verison of the oft-abused ppcboot header. 

There are several variants of the SBC deployed, single/dual
ethernet+serial, and also 4MB/8MB flash variations.  In the interest of
having a single kernel image boot on all boards, the cuboot shim detects
the differences and mangles the DTS tree appropriately.

With the exception of the CF interface that was never populated on
production boards, this code/DTS supports all boardpop options.

Signed-off-by:  Solomon Peachy <solomon@linux-wlan.com>

diff -Naur linux-2.6.30/arch/powerpc/boot/Makefile linux-2.6.30.hotfoot/arch/powerpc/boot/Makefile
--- linux-2.6.30/arch/powerpc/boot/Makefile	2009-06-09 23:05:27.000000000 -0400
+++ linux-2.6.30.hotfoot/arch/powerpc/boot/Makefile	2009-07-07 12:55:18.000000000 -0400
@@ -39,6 +39,7 @@
 
 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405
 $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
+$(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405
@@ -67,7 +68,7 @@
 		cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
 		fsl-soc.c mpc8xx.c pq2.c
 src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
-		cuboot-ebony.c treeboot-ebony.c prpmc2800.c \
+		cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \
 		ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
 		cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
 		cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
@@ -190,6 +191,7 @@
 
 # Board ports in arch/powerpc/platform/40x/Kconfig
 image-$(CONFIG_EP405)			+= dtbImage.ep405
+image-$(CONFIG_HOTFOOT)			+= cuImage.hotfoot
 image-$(CONFIG_WALNUT)			+= treeImage.walnut
 image-$(CONFIG_ACADIA)			+= cuImage.acadia
 
diff -Naur linux-2.6.30/arch/powerpc/boot/cuboot-hotfoot.c linux-2.6.30.hotfoot/arch/powerpc/boot/cuboot-hotfoot.c
--- linux-2.6.30/arch/powerpc/boot/cuboot-hotfoot.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.30.hotfoot/arch/powerpc/boot/cuboot-hotfoot.c	2009-07-07 12:55:23.000000000 -0400
@@ -0,0 +1,142 @@
+/*
+ * Old U-boot compatibility for Esteem 195E Hotfoot CPU Board
+ *
+ * Author: Solomon Peachy <solomon@linux-wlan.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "reg.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_HOTFOOT
+
+#include "ppcboot.h"
+
+static bd_t bd;
+
+#define NUM_REGS 3
+
+static void hotfoot_fixups(void)
+{
+	u32 uart = mfdcr(DCRN_CPC0_UCR) & 0x7f;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize); 
+
+	dt_fixup_cpu_clocks(bd.bi_procfreq, bd.bi_procfreq, 0);
+	dt_fixup_clock("/plb", bd.bi_plb_busfreq);
+	dt_fixup_clock("/plb/opb", bd.bi_opbfreq);
+	dt_fixup_clock("/plb/ebc", bd.bi_pci_busfreq);
+	dt_fixup_clock("/plb/opb/serial@ef600300", bd.bi_procfreq / uart); 
+	dt_fixup_clock("/plb/opb/serial@ef600400", bd.bi_procfreq / uart); 
+	
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+
+	/* Is this a single eth/serial board? */
+	if ((bd.bi_enet1addr[0] == 0) && 
+	    (bd.bi_enet1addr[1] == 0) &&
+	    (bd.bi_enet1addr[2] == 0) &&
+	    (bd.bi_enet1addr[3] == 0) &&
+	    (bd.bi_enet1addr[4] == 0) &&
+	    (bd.bi_enet1addr[5] == 0)) {
+		void *devp;
+
+		printf("Trimming devtree for single eth board\n");
+
+		devp = finddevice("/plb/opb/serial@ef600300");
+		if (!devp)
+			fatal("Can't find node for /plb/opb/serial@ef600300");
+		del_node(devp);
+
+		devp = finddevice("/plb/opb/ethernet@ef600900");
+		if (!devp)
+			fatal("Can't find node for /plb/opb/ethernet@ef600900");
+		del_node(devp);
+	}
+
+	ibm4xx_quiesce_eth((u32 *)0xef600800, (u32 *)0xef600900);
+
+	/* Fix up flash size in fdt for 4M boards. */
+	if (bd.bi_flashsize < 0x800000) {
+		u32 regs[NUM_REGS];
+		void *devp = finddevice("/plb/ebc/nor_flash@0");
+		if (!devp)
+			fatal("Can't find FDT node for nor_flash!??");
+
+		printf("Fixing devtree for 4M Flash\n");
+		
+		/* First fix up the base addresse */
+		getprop(devp, "reg", regs, sizeof(regs));
+		regs[0] = 0;
+		regs[1] = 0xffc00000;
+		regs[2] = 0x00400000;
+		setprop(devp, "reg", regs, sizeof(regs));
+		
+		/* Then the offsets */
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@0");
+		if (!devp)
+			fatal("Can't find FDT node for partition@0");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@1");
+		if (!devp)
+			fatal("Can't find FDT node for partition@1");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@2");
+		if (!devp)
+			fatal("Can't find FDT node for partition@2");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@3");
+		if (!devp)
+			fatal("Can't find FDT node for partition@3");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@4");
+		if (!devp)
+			fatal("Can't find FDT node for partition@4");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@6");
+		if (!devp)
+			fatal("Can't find FDT node for partition@6");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		/* Delete the FeatFS node */
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@5");
+		if (!devp)
+			fatal("Can't find FDT node for partition@5");
+		del_node(devp);
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = hotfoot_fixups;
+        platform_ops.exit = ibm40x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff -Naur linux-2.6.30/arch/powerpc/boot/dts/hotfoot.dts linux-2.6.30.hotfoot/arch/powerpc/boot/dts/hotfoot.dts
--- linux-2.6.30/arch/powerpc/boot/dts/hotfoot.dts	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.30.hotfoot/arch/powerpc/boot/dts/hotfoot.dts	2009-07-07 12:55:23.000000000 -0400
@@ -0,0 +1,299 @@
+/*
+ * Device Tree Source for ESTeem 195E Hotfoot
+ *
+ * Copyright 2009 AbsoluteValue Systems <solomon@linux-wlan.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "est,hotfoot";
+	compatible = "est,hotfoot";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <0x20>;
+			d-cache-line-size = <0x20>;
+			i-cache-size = <0x4000>;
+			d-cache-size = <0x4000>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	plb {
+		compatible = "ibm,plb3";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405ep";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL: mcmal {
+			compatible = "ibm,mcmal-405ep", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <
+				0xb 0x4 /* TXEOB */
+				0xc 0x4 /* RXEOB */
+				0xa 0x4 /* SERR */
+				0xd 0x4 /* TXDE */
+				0xe 0x4 /* RXDE */>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ep", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xef600000 0xef600000 0x00a00000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by zImage */
+
+			/* Hotfoot has UART0/UART1 swapped */
+
+			UART0: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <0x9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <0x9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+
+			IIC: i2c@ef600500 {
+				compatible = "ibm,iic-405ep", "ibm,iic";
+				reg = <0xef600500 0x00000011>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+
+				rtc@68 {
+				      /* Actually a DS1339 */
+				      compatible = "dallas,ds1307"; 
+				      reg = <0x68>;
+				};
+
+				temp@4a { 
+				      /* Not present on all boards */
+				      compatible = "national,lm75";
+				      reg = <0x4a>;
+				};				
+			};
+
+			GPIO: gpio@ef600700 {
+			        #gpio-cells = <2>;
+				compatible = "ibm,ppc4xx-gpio";
+				reg = <0xef600700 0x00000020>;
+				gpio-controller;
+			};
+
+			gpio-leds {
+			     compatible = "gpio-leds";
+			     status {
+			     	    label = "Status";
+			     	    gpios = <&GPIO 1 0>;
+				    /* linux,default=trigger = ".."; */
+			     };
+			     radiorx {
+			     	    label = "Rx";
+			     	    gpios = <&GPIO 0xe 0>;
+				    /* linux,default=trigger = ".."; */
+			     };
+			};
+
+
+			EMAC0: ethernet@ef600800 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405ep", "ibm,emac";
+				interrupt-parent = <&UIC0>;
+				interrupts = <
+					0xf 0x4 /* Ethernet */
+					0x9 0x4 /* Ethernet Wake Up */>;
+				local-mac-address = [000000000000]; /* Filled in by zImage */
+				reg = <0xef600800 0x00000070>;
+				mal-device = <&MAL>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+				phy-mode = "mii";
+				phy-map = <0x00000000>;
+			};
+
+			EMAC1: ethernet@ef600900 {
+				linux,network-index = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-405ep", "ibm,emac";
+				interrupt-parent = <&UIC0>;
+				interrupts = <
+					0x11 0x4 /* Ethernet */
+					0x9 0x4 /* Ethernet Wake Up */>;
+				local-mac-address = [000000000000]; /* Filled in by zImage */
+				reg = <0xef600900 0x00000070>;
+				mal-device = <&MAL>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+                                mdio-device = <&EMAC0>;
+				phy-mode = "mii";
+				phy-map = <0x0000001>;
+			};
+		};
+
+		EBC0: ebc {
+			compatible = "ibm,ebc-405ep", "ibm,ebc";
+			dcr-reg = <0x012 0x002>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+
+			/* The ranges property is supplied by the bootwrapper
+			 * and is based on the firmware's configuration of the
+			 * EBC bridge
+			 */
+			clock-frequency = <0>; /* Filled in by zImage */
+
+                        nor_flash@0 {
+                                compatible = "cfi-flash";
+                                bank-width = <2>;
+                                reg = <0x0 0xff800000 0x00800000>;
+                                #address-cells = <1>;
+                                #size-cells = <1>;
+
+				/* This mapping is for the 8M flash
+				   4M flash has all ofssets -= 4M,
+				   and FeatFS partition is not present */
+				
+                                partition@0 {
+                                        label = "Bootloader";
+                                        reg = <0x7c0000 0x40000>;
+                                        /* read-only; */
+                                };
+                                partition@1 {
+                                        label = "Env_and_Config_Primary";
+                                        reg = <0x400000 0x10000>;
+                                };
+                                partition@2 {
+                                        label = "Kernel";
+                                        reg = <0x420000 0x100000>;
+                                };
+                                partition@3 {
+                                        label = "Filesystem";
+                                        reg = <0x520000 0x2a0000>;
+                                };
+                                partition@4 {
+                                        label = "Env_and_Config_Secondary";
+                                        reg = <0x410000 0x10000>;
+                                };
+                                partition@5 {
+                                        label = "FeatFS";
+                                        reg = <0x000000 0x400000>;
+                                };
+                                partition@6 {
+                                        label = "Bootloader_Env";
+                                        reg = <0x7d0000 0x10000>;
+                                };
+			};
+		};
+
+                PCI0: pci@ec000000 {
+                        device_type = "pci";
+                        #interrupt-cells = <1>;
+                        #size-cells = <2>;
+                        #address-cells = <3>;
+                        compatible = "ibm,plb405ep-pci", "ibm,plb-pci";
+                        primary;
+                        reg = <0xeec00000 0x00000008    /* Config space access */
+                               0xeed80000 0x00000004    /* IACK */
+                               0xeed80000 0x00000004    /* Special cycle */
+                               0xef480000 0x00000040>;  /* Internal registers */
+
+                        /* Outbound ranges, one memory and one IO,
+                         * later cannot be changed. Chip supports a second
+                         * IO range but we don't use it for now
+                         */
+                        ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000
+                                  0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
+
+                        /* Inbound 2GB range starting at 0 */
+                        dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			interrupt-parent = <&UIC0>;
+                        interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+                        interrupt-map = <
+                                /* IDSEL 3 -- slot1 (optional) 27/29 A/B IRQ2/4 */
+                                0x1800 0x0 0x0 0x1 &UIC0 0x1b 0x8
+                                0x1800 0x0 0x0 0x2 &UIC0 0x1d 0x8
+
+                                /* IDSEL 4 -- slot0, 26/28 A/B IRQ1/3 */
+                                0x2000 0x0 0x0 0x1 &UIC0 0x1a 0x8
+                                0x2000 0x0 0x0 0x2 &UIC0 0x1c 0x8
+                        >;
+                };
+	};
+
+	chosen {
+		linux,stdout-path = &UART0;
+	};
+};
diff -Naur linux-2.6.30/arch/powerpc/boot/ppcboot.h linux-2.6.30.hotfoot/arch/powerpc/boot/ppcboot.h
--- linux-2.6.30/arch/powerpc/boot/ppcboot.h	2009-06-09 23:05:27.000000000 -0400
+++ linux-2.6.30.hotfoot/arch/powerpc/boot/ppcboot.h	2009-07-07 12:55:18.000000000 -0400
@@ -52,6 +52,11 @@
 	unsigned long	bi_bootflags;	/* boot / reboot flag (for LynxOS) */
 	unsigned long	bi_ip_addr;	/* IP Address */
 	unsigned char	bi_enetaddr[6];	/* Ethernet address */
+#if defined(TARGET_HOTFOOT)
+	/* second onboard ethernet port */
+	unsigned char	bi_enet1addr[6];
+#define HAVE_ENET1ADDR
+#endif /* TARGET_HOOTFOOT */
 	unsigned short	bi_ethspeed;	/* Ethernet speed in Mbps */
 	unsigned long	bi_intfreq;	/* Internal Freq, in MHz */
 	unsigned long	bi_busfreq;	/* Bus Freq, in MHz */
@@ -74,6 +79,9 @@
 	unsigned int	bi_pci_busfreq;	/* PCI Bus speed, in Hz */
 	unsigned char	bi_pci_enetaddr[6];	/* PCI Ethernet MAC address */
 #endif
+#if defined(TARGET_HOTFOOT)
+	unsigned int     bi_pllouta_freq;       /* PLL OUTA speed, in Hz */
+#endif
 #if defined(TARGET_HYMOD)
 	hymod_conf_t	bi_hymod_conf;	/* hymod configuration information */
 #endif
@@ -94,6 +102,10 @@
 	unsigned char	bi_enet3addr[6];
 #define HAVE_ENET3ADDR
 #endif
+#if defined(TARGET_HOTFOOT)
+        int             bi_phynum[2];           /* Determines phy mapping */
+        int             bi_phymode[2];          /* Determines phy mode */
+#endif
 #if defined(TARGET_4xx)
 	unsigned int	bi_opbfreq;		/* OB clock in Hz */
 	int		bi_iic_fast[2];		/* Use fast i2c mode */
diff -Naur linux-2.6.30/arch/powerpc/platforms/40x/Kconfig linux-2.6.30.hotfoot/arch/powerpc/platforms/40x/Kconfig
--- linux-2.6.30/arch/powerpc/platforms/40x/Kconfig	2009-06-09 23:05:27.000000000 -0400
+++ linux-2.6.30.hotfoot/arch/powerpc/platforms/40x/Kconfig	2009-07-07 12:55:18.000000000 -0400
@@ -40,6 +40,16 @@
 	help
 	  This option enables support for the Nestal Maschinen HCU4 board.
 
+config HOTFOOT
+        bool "Hotfoot"
+	depends on 40x
+	default n
+	select 405EP
+	select PPC40x_SIMPLE
+	select PCI
+        help
+	 This option enables support for the ESTEEM 195E Hotfoot board.
+
 config KILAUEA
 	bool "Kilauea"
 	depends on 40x
diff -Naur linux-2.6.30/arch/powerpc/platforms/40x/ppc40x_simple.c linux-2.6.30.hotfoot/arch/powerpc/platforms/40x/ppc40x_simple.c
--- linux-2.6.30/arch/powerpc/platforms/40x/ppc40x_simple.c	2009-06-09 23:05:27.000000000 -0400
+++ linux-2.6.30.hotfoot/arch/powerpc/platforms/40x/ppc40x_simple.c	2009-07-07 12:55:18.000000000 -0400
@@ -51,7 +51,8 @@
  * board.c file for it rather than adding it to this list.
  */
 static char *board[] __initdata = {
-	"amcc,acadia"
+	"amcc,acadia",
+	"est,hotfoot"
 };
 
 static int __init ppc40x_probe(void)

^ permalink raw reply

* Re: [PATCH] Add support for the ESTeem 195E (PPC405EP) SBC
From: Josh Boyer @ 2009-07-24 15:59 UTC (permalink / raw)
  To: Solomon Peachy; +Cc: linuxppc-dev
In-Reply-To: <1248448916-29473-1-git-send-email-solomon@linux-wlan.com>

On Fri, Jul 24, 2009 at 11:21:56AM -0400, Solomon Peachy wrote:
>This patch adds support for the ESTeem 195E Hotfoot SBC.

Is this one different from the one you sent out yesterday?  I plan to review
this support soon, but now I'm slightly confused :)

josh

^ permalink raw reply

* Re: [PATCH] Add support for the ESTeem 195E (PPC405EP) SBC
From: Solomon Peachy @ 2009-07-24 16:10 UTC (permalink / raw)
  To: Josh Boyer; +Cc: linuxppc-dev
In-Reply-To: <20090724155948.GC2822@hansolo.jdub.homelinux.org>

[-- Attachment #1: Type: text/plain, Size: 686 bytes --]

On Fri, Jul 24, 2009 at 11:59:48AM -0400, Josh Boyer wrote:
> Is this one different from the one you sent out yesterday?  I plan to review
> this support soon, but now I'm slightly confused :)

Crap, sorry -- I didn't see it show up in the mailing list archives so I 
thought it got dropped into the spam bitbucket of the sky.  The latter 
mailing was sent just after I subscribed to the mailing list.

 - Solomon
-- 
Solomon Peachy                        solomon@linux-wlan.com
AbsoluteValue Systems                 http://www.linux-wlan.com
721-D North Drive                     +1 (321) 259-0737  (office)
Melbourne, FL 32934                   +1 (321) 259-0286  (fax)

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply

* Re: [PATCH] Add support for the ESTeem 195E (PPC405EP) SBC
From: Josh Boyer @ 2009-07-24 16:45 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20090724161029.GA29263@linux-wlan.com>

On Fri, Jul 24, 2009 at 12:10:30PM -0400, Solomon Peachy wrote:
>On Fri, Jul 24, 2009 at 11:59:48AM -0400, Josh Boyer wrote:
>> Is this one different from the one you sent out yesterday?  I plan to review
>> this support soon, but now I'm slightly confused :)
>
>Crap, sorry -- I didn't see it show up in the mailing list archives so I 
>thought it got dropped into the spam bitbucket of the sky.  The latter 
>mailing was sent just after I subscribed to the mailing list.

No worries.  If they aren't different, I'll just pick the latest one to review.

Thanks for sending the patch!  More board support is always welcome.

josh

^ permalink raw reply

* Re: [PATCH] Support for PCI Express reset type in EEH
From: Richard Lary @ 2009-07-24 21:36 UTC (permalink / raw)
  To: linasvepstas; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
In-Reply-To: <3ae3aa420907230744ya3a9342w4f29e150b3b5658f@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2225 bytes --]



Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:

> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
> > By default, EEH does what's known as a "hot reset" during error
recovery of
> > a PCI Express device.  We've found a case where the device needs a
> > "fundamental reset" to recover properly.  The current PCI error
recovery and
> > EEH frameworks do not support this distinction.
> >
> > The attached patch (courtesy of Richard Lary) adds a bit field to
pci_dev
> > that indicates whether the device requires a fundamental reset during
error
> > recovery.  This bit can be checked by EEH to determine which reset type
is
> > required.
> >
> > This patch supersedes the previously submitted patch that implemented a
> > reset type callback.
> >
> > Please review and let me know of any concerns.
>
> I like this patch a *lot* better .. it is vastly simpler, more direct.
>
>
> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
> > --- a/include/linux/pci.h       2009-07-13 14:25:37.000000000 -0700
> > +++ b/include/linux/pci.h       2009-07-15 10:25:37.000000000 -0700
> > @@ -273,6 +273,7 @@ struct pci_dev {
> >        unsigned int    ari_enabled:1;  /* ARI forwarding */
> >        unsigned int    is_managed:1;
> >        unsigned int    is_pcie:1;
> > +       unsigned int    fndmntl_rst_rqd:1; /* Dev requires fundamental
reset
> > */
> >        unsigned int    state_saved:1;
> >        unsigned int    is_physfn:1;
> >        unsigned int    is_virtfn:1;
>
> As Ben points out, the name is awkward.  How about needs_freset ?

I am OK with name change.

> Since this affects the entire pci subsystem, it should be documented
> properly.  The "pci error recovery" subsystem was designed to be
> usable in other architectures, and so the error recovery docs should
> take at least a paragraph to describe what this flag means, and when
> its supposed to be used.

I will update the documentation, are you referring to
Documentation/powerpc/eeh-pci-error-recovery.txt
or some other documentation?

> Providing the docs patch together with the pci.h patch *only* would
> probably simplify acceptance by the PCI community.
>
> --linas

[-- Attachment #2: Type: text/html, Size: 2792 bytes --]

^ permalink raw reply

* [PATCH 1/2] powerpc/83xx: Add support for MPC8377E-WLAN boards
From: Anton Vorontsov @ 2009-07-24 21:42 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev

MPC8377E-WLAN are basically RDB boards except:

- RAM extended to 512 MB;
- NAND flash removed, NOR flash extended to 64 MB;
- Vitesse VSC7385 5-port switch removed, RTL8211B PHY added;
- Power management MCU removed;
- PCI slot removed, another mini-PCI slot added (IRQ routing changed);
- USB3300 PHY's ID pin grounded, thus USB port is host-only.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 arch/powerpc/boot/dts/mpc8377_wlan.dts    |  464 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/83xx/Kconfig       |    4 +-
 arch/powerpc/platforms/83xx/mpc837x_rdb.c |    5 +-
 3 files changed, 469 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/mpc8377_wlan.dts

diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
new file mode 100644
index 0000000..3febc4e
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -0,0 +1,464 @@
+/*
+ * MPC8377E WLAN Device Tree Source
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ * Copyright 2009 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8377wlan";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8377@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x20000000>;	// 512MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8377-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+		ranges = <0x0 0x0 0xfc000000 0x04000000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				reg = <0 0x8000>;
+				label = "u-boot";
+				read-only;
+			};
+
+			partition@a0000 {
+				reg = <0xa0000 0x300000>;
+				label = "kernel";
+			};
+
+			partition@3a0000 {
+				reg = <0x3a0000 0x3c60000>;
+				label = "rootfs";
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x0c000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+
+				at24@50 {
+					compatible = "at24,24c256";
+					reg = <0x50>;
+				};
+
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+			};
+
+			sdhci@2e000 {
+				compatible = "fsl,mpc8377-esdhc", "fsl,esdhc";
+				reg = <0x2e000 0x1000>;
+				interrupts = <42 0x8>;
+				interrupt-parent = <&ipic>;
+				clock-frequency = <133333333>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8377-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "ulpi";
+			sleep = <&pmc 0x00c00000>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+			sleep = <&pmc 0xc0000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+					device_type = "ethernet-phy";
+				};
+
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x3>;
+					device_type = "ethernet-phy";
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			phy-handle = <&phy3>;
+			tbi-handle = <&tbi1>;
+			sleep = <&pmc 0x30000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		sata@18000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x18000 0x1000>;
+			interrupts = <44 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x000000c0>;
+		};
+
+		sata@19000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x19000 0x1000>;
+			interrupts = <45 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x00000030>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8377-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IRQ5 = 21 = 0x15, IRQ6 = 0x16, IRQ7 = 23 = 0x17 */
+
+				/* IDSEL AD14 IRQ6 inta */
+				 0x7000 0x0 0x0 0x1 &ipic 22 0x8
+
+				/* IDSEL AD15 IRQ5 inta */
+				 0x7800 0x0 0x0 0x1 &ipic 21 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+		          0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+		          0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
+		sleep = <&pmc 0x00010000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000>;
+		ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		sleep = <&pmc 0x00300000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa8000000
+				  0x02000000 0 0xa8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe000a000 0x00001000>;
+		ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 2 8
+				 0 0 0 2 &ipic 2 8
+				 0 0 0 3 &ipic 2 8
+				 0 0 0 4 &ipic 2 8>;
+		sleep = <&pmc 0x000c0000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xc8000000
+				  0x02000000 0 0xc8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+};
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 083ebee..f49a254 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -75,11 +75,11 @@ config MPC837x_MDS
 	  This option enables support for the MPC837x MDS Processor Board.
 
 config MPC837x_RDB
-	bool "Freescale MPC837x RDB"
+	bool "Freescale MPC837x RDB/WLAN"
 	select DEFAULT_UIMAGE
 	select PPC_MPC837x
 	help
-	  This option enables support for the MPC837x RDB Board.
+	  This option enables support for the MPC837x RDB and WLAN Boards.
 
 config SBC834x
 	bool "Wind River SBC834x"
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index 76f3b32..91d19ab 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -86,11 +86,12 @@ static int __init mpc837x_rdb_probe(void)
 
 	return of_flat_dt_is_compatible(root, "fsl,mpc8377rdb") ||
 	       of_flat_dt_is_compatible(root, "fsl,mpc8378rdb") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8379rdb");
+	       of_flat_dt_is_compatible(root, "fsl,mpc8379rdb") ||
+	       of_flat_dt_is_compatible(root, "fsl,mpc8377wlan");
 }
 
 define_machine(mpc837x_rdb) {
-	.name			= "MPC837x RDB",
+	.name			= "MPC837x RDB/WLAN",
 	.probe			= mpc837x_rdb_probe,
 	.setup_arch		= mpc837x_rdb_setup_arch,
 	.init_IRQ		= mpc837x_rdb_init_IRQ,
-- 
1.6.3.3

^ permalink raw reply related

* [PATCH 2/2] powerpc/83xx: Add eSDHC support for MPC837xE-RDB/WLAN boards
From: Anton Vorontsov @ 2009-07-24 21:42 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev

Actually, the support is already there, but it requires newer U-Boots
(to fill-in clock-frequency, and setup pin multiplexing).

Though, it appears that on RDB boards USBB pins aren't multiplexed
between USB and eSDHC (unlike MDS boards, where USB and eSDHC share
pctl and pwrfault pins).

So, for RDB boards we can safely setup pinmux and manually fill-in
clock-frequency, thus making eSDHC work even with older u-boots.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 arch/powerpc/boot/dts/mpc8377_rdb.dts     |    2 +-
 arch/powerpc/boot/dts/mpc8378_rdb.dts     |    2 +-
 arch/powerpc/boot/dts/mpc8379_rdb.dts     |    2 +-
 arch/powerpc/platforms/83xx/mpc837x_rdb.c |   23 +++++++++++++++++++++++
 arch/powerpc/platforms/83xx/mpc83xx.h     |    4 ++++
 5 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index 224b4f0..1913974 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -174,7 +174,7 @@
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
 				/* Filled in by U-Boot */
-				clock-frequency = <0>;
+				clock-frequency = <111111111>;
 			};
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index 474ea2f..4306901 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -174,7 +174,7 @@
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
 				/* Filled in by U-Boot */
-				clock-frequency = <0>;
+				clock-frequency = <111111111>;
 			};
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index d4838af..897b70c 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -172,7 +172,7 @@
 				interrupts = <42 0x8>;
 				interrupt-parent = <&ipic>;
 				/* Filled in by U-Boot */
-				clock-frequency = <0>;
+				clock-frequency = <111111111>;
 			};
 		};
 
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index 91d19ab..a1908d2 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -17,10 +17,32 @@
 #include <asm/time.h>
 #include <asm/ipic.h>
 #include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
 #include "mpc83xx.h"
 
+static void mpc837x_rdb_sd_cfg(void)
+{
+	void __iomem *im;
+
+	im = ioremap(get_immrbase(), 0x1000);
+	if (!im) {
+		WARN_ON(1);
+		return;
+	}
+
+	/*
+	 * On RDB boards (in contrast to MDS) USBB pins are used for SD only,
+	 * so we can safely mux them away from the USB block.
+	 */
+	clrsetbits_be32(im + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USBB_MASK,
+						 MPC837X_SICRL_SD);
+	clrsetbits_be32(im + MPC83XX_SICRH_OFFS, MPC837X_SICRH_SPI_MASK,
+						 MPC837X_SICRH_SD);
+	iounmap(im);
+}
+
 /* ************************************************************************
  *
  * Setup the architecture
@@ -42,6 +64,7 @@ static void __init mpc837x_rdb_setup_arch(void)
 		mpc83xx_add_bridge(np);
 #endif
 	mpc837x_usb_cfg();
+	mpc837x_rdb_sd_cfg();
 }
 
 static struct of_device_id mpc837x_ids[] = {
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index d1dc5b0..0fea881 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -30,6 +30,8 @@
 #define MPC8315_SICRL_USB_ULPI     0x00000054
 #define MPC837X_SICRL_USB_MASK     0xf0000000
 #define MPC837X_SICRL_USB_ULPI     0x50000000
+#define MPC837X_SICRL_USBB_MASK    0x30000000
+#define MPC837X_SICRL_SD           0x20000000
 
 /* system i/o configuration register high */
 #define MPC83XX_SICRH_OFFS         0x118
@@ -38,6 +40,8 @@
 #define MPC831X_SICRH_USB_ULPI     0x000000a0
 #define MPC8315_SICRH_USB_MASK     0x0000ff00
 #define MPC8315_SICRH_USB_ULPI     0x00000000
+#define MPC837X_SICRH_SPI_MASK     0x00000003
+#define MPC837X_SICRH_SD           0x00000001
 
 /* USB Control Register */
 #define FSL_USB2_CONTROL_OFFS      0x500
-- 
1.6.3.3

^ permalink raw reply related

* [PATCH] sdhci: Get rid of "frequency too high" flood when using eSDHC
From: Anton Vorontsov @ 2009-07-24 21:55 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ben Dooks, sdhci-devel, linux-kernel, linuxppc-dev, Pierre Ossman

Since commit 8dfd0374be84793360db7fff2e635d2cd3bbcb21 ("MMC core: limit
minimum initialization frequency to 400kHz") MMC core checks for minimum
frequency, and that causes following messages flood when using eSDHC
controllers:

  ...
  mmc0: Minimum clock frequency too high for identification mode
  mmc0: Minimum clock frequency too high for identification mode
  ...

The warnings are legitimate, since if we'd use 133 MHz clocks for
standard SDHCI controllers, we'd not able to scale frequency down
to 400 kHz.

But eSDHC controllers have a non-standard SD clock management, so
we can divide clock by 256 * 16, not just 256.

This patch introduces get_min_clock() callback for sdhci core and
implements it for sdhci-of driver, and thus fixes the issue.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/mmc/host/sdhci-of.c |    8 ++++++++
 drivers/mmc/host/sdhci.c    |    5 ++++-
 drivers/mmc/host/sdhci.h    |    1 +
 3 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c
index d79fa55..9088443 100644
--- a/drivers/mmc/host/sdhci-of.c
+++ b/drivers/mmc/host/sdhci-of.c
@@ -158,6 +158,13 @@ static unsigned int esdhc_get_max_clock(struct sdhci_host *host)
 	return of_host->clock;
 }
 
+static unsigned int esdhc_get_min_clock(struct sdhci_host *host)
+{
+	struct sdhci_of_host *of_host = sdhci_priv(host);
+
+	return of_host->clock / 256 / 16;
+}
+
 static unsigned int esdhc_get_timeout_clock(struct sdhci_host *host)
 {
 	struct sdhci_of_host *of_host = sdhci_priv(host);
@@ -184,6 +191,7 @@ static struct sdhci_of_data sdhci_esdhc = {
 		.set_clock = esdhc_set_clock,
 		.enable_dma = esdhc_enable_dma,
 		.get_max_clock = esdhc_get_max_clock,
+		.get_min_clock = esdhc_get_min_clock,
 		.get_timeout_clock = esdhc_get_timeout_clock,
 	},
 };
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6779b4e..62041c7 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1766,7 +1766,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * Set host parameters.
 	 */
 	mmc->ops = &sdhci_ops;
-	mmc->f_min = host->max_clk / 256;
+	if (host->ops->get_min_clock)
+		mmc->f_min = host->ops->get_min_clock(host);
+	else
+		mmc->f_min = host->max_clk / 256;
 	mmc->f_max = host->max_clk;
 	mmc->caps = MMC_CAP_SDIO_IRQ;
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 831ddf7..c77e9ff 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -302,6 +302,7 @@ struct sdhci_ops {
 
 	int		(*enable_dma)(struct sdhci_host *host);
 	unsigned int	(*get_max_clock)(struct sdhci_host *host);
+	unsigned int	(*get_min_clock)(struct sdhci_host *host);
 	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
 };
 
-- 
1.6.3.3

^ permalink raw reply related

* Re: [PATCH] Support for PCI Express reset type in EEH
From: Linas Vepstas @ 2009-07-25  0:30 UTC (permalink / raw)
  To: Richard Lary; +Cc: linuxppc-dev, mmlnx, Paul Mackerras, linux-pci
In-Reply-To: <OF71B46F7B.49A6C08D-ON882575FD.00768DAA-882575FD.0076AF2C@us.ibm.com>

2009/7/24 Richard Lary <rlary@us.ibm.com>:
> Linas Vepstas <linasvepstas@gmail.com> wrote on 07/23/2009 07:44:33 AM:
>
>> 2009/7/15 Mike Mason <mmlnx@us.ibm.com>:
>> > By default, EEH does what's known as a "hot reset" during error recove=
ry
>> > of
>> > a PCI Express device. =C2=A0We've found a case where the device needs =
a
>> > "fundamental reset" to recover properly. =C2=A0The current PCI error r=
ecovery
>> > and
>> > EEH frameworks do not support this distinction.
>> >
>> > The attached patch (courtesy of Richard Lary) adds a bit field to
>> > pci_dev
>> > that indicates whether the device requires a fundamental reset during
>> > error
>> > recovery. =C2=A0This bit can be checked by EEH to determine which rese=
t type
>> > is
>> > required.
>> >
>> > This patch supersedes the previously submitted patch that implemented =
a
>> > reset type callback.
>> >
>> > Please review and let me know of any concerns.
>>
>> I like this patch a *lot* better .. it is vastly simpler, more direct.
>>
>>
>> > diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
>> > --- a/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-13 14:25:37.000=
000000 -0700
>> > +++ b/include/linux/pci.h =C2=A0 =C2=A0 =C2=A0 2009-07-15 10:25:37.000=
000000 -0700
>> > @@ -273,6 +273,7 @@ struct pci_dev {
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0ari_enabled:1; =
=C2=A0/* ARI forwarding */
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_managed:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_pcie:1;
>> > + =C2=A0 =C2=A0 =C2=A0 unsigned int =C2=A0 =C2=A0fndmntl_rst_rqd:1; /*=
 Dev requires fundamental
>> > reset
>> > */
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0state_saved:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_physfn:1;
>> > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned int =C2=A0 =C2=A0is_virtfn:1;
>>
>> As Ben points out, the name is awkward. =C2=A0How about needs_freset ?
>
> I am OK with name change.
>
>
>> Since this affects the entire pci subsystem, it should be documented
>> properly. =C2=A0The "pci error recovery" subsystem was designed to be
>> usable in other architectures, and so the error recovery docs should
>> take at least a paragraph to describe what this flag means, and when
>> its supposed to be used.
>
> I will update the documentation, are you referring to
> Documentation/powerpc/eeh-pci-error-recovery.txt
> or some other documentation?

No, I'm thinking
Documentation/PCI/pci-error-recovery.txt

because the flag is not powerpc-specific.

--linas

>
>> Providing the docs patch together with the pci.h patch *only* would
>> probably simplify acceptance by the PCI community.
>>
>> --linas
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox