LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] powerpc: Remove use of a second scratch SPRG in STAB code
From: Benjamin Herrenschmidt @ 2009-07-15  6:52 UTC (permalink / raw)
  To: linuxppc-dev

The STAB code used on Power3 and RS/64 uses a second scratch SPRG to
save a GPR in order to decide whether to go to do_stab_bolted_* or
to handle a normal data access exception.

This prevents our scheme of freeing SPRG3 which is user visible for
user uses since we cannot use SPRG0 which, on RS/64, seems to be
read-only for supervisor mode (like POWER4).

This reworks the STAB exception entry to use the PACA as temporary
storage instead.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/include/asm/exception-64s.h   |    7 ++++-
 arch/powerpc/include/asm/reg.h             |    3 --
 arch/powerpc/kernel/exceptions-64s.S       |   36 ++++++++++++++++++-----------
 arch/powerpc/platforms/iseries/exception.S |   34 ++++++++++++++++-----------
 4 files changed, 50 insertions(+), 30 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/exception-64s.h	2009-07-15 15:07:50.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/exception-64s.h	2009-07-15 15:15:36.000000000 +1000
@@ -66,8 +66,7 @@
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
 
-#define EXCEPTION_PROLOG_PSERIES(area, label)				\
-	EXCEPTION_PROLOG_1(area);					\
+#define EXCEPTION_PROLOG_PSERIES_1(label)				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
 	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
@@ -78,6 +77,10 @@
 	rfid;								\
 	b	.	/* prevent speculative execution */
 
+#define EXCEPTION_PROLOG_PSERIES(area, label)				\
+	EXCEPTION_PROLOG_1(area);					\
+	EXCEPTION_PROLOG_PSERIES_1(label);
+
 /*
  * The common exception prolog is used for all except a few exceptions
  * such as a segment miss on a kernel address.  We have to be prepared
Index: linux-work/arch/powerpc/kernel/exceptions-64s.S
===================================================================
--- linux-work.orig/arch/powerpc/kernel/exceptions-64s.S	2009-07-15 15:07:50.000000000 +1000
+++ linux-work/arch/powerpc/kernel/exceptions-64s.S	2009-07-15 15:39:12.000000000 +1000
@@ -50,18 +50,26 @@ data_access_pSeries:
 	HMT_MEDIUM
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 BEGIN_FTR_SECTION
-	mtspr	SPRN_SPRG_SCRATCH1,r12
-	mfspr	r13,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	srdi	r13,r13,60
-	rlwimi	r13,r12,16,0x20
-	mfcr	r12
-	cmpwi	r13,0x2c
+	mfspr	r13,SPRN_SPRG_PACA
+	std	r9,PACA_EXSLB+EX_R9(r13)
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	mfspr	r10,SPRN_DAR
+	mfspr	r9,SPRN_DSISR
+	srdi	r10,r10,60
+	rlwimi	r10,r9,16,0x20
+	mfcr	r9
+	cmpwi	r10,0x2c
 	beq	do_stab_bolted_pSeries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG_SCRATCH1
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r11,PACA_EXSLB+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfspr	r12,SPRN_SPRG_SCRATCH0
+	std	r10,PACA_EXGEN+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R13(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
+	EXCEPTION_PROLOG_PSERIES_1(data_access_common)
 
 	. = 0x380
 	.globl data_access_slb_pSeries
@@ -224,9 +232,11 @@ masked_interrupt:
 
 	.align	7
 do_stab_bolted_pSeries:
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG_SCRATCH1
-	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	mfspr	r10,SPRN_SPRG_SCRATCH0
+	std	r10,PACA_EXSLB+EX_R13(r13)
+	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted)
 
 #ifdef CONFIG_PPC_PSERIES
 /*
Index: linux-work/arch/powerpc/platforms/iseries/exception.S
===================================================================
--- linux-work.orig/arch/powerpc/platforms/iseries/exception.S	2009-07-15 15:07:50.000000000 +1000
+++ linux-work/arch/powerpc/platforms/iseries/exception.S	2009-07-15 15:39:14.000000000 +1000
@@ -128,25 +128,33 @@ iSeries_secondary_smp_loop:
 data_access_iSeries:
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 BEGIN_FTR_SECTION
-	mtspr	SPRN_SPRG_SCRATCH1,r12
-	mfspr	r13,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	srdi	r13,r13,60
-	rlwimi	r13,r12,16,0x20
-	mfcr	r12
-	cmpwi	r13,0x2c
+	mfspr	r13,SPRN_SPRG_PACA
+	std	r9,PACA_EXSLB+EX_R9(r13)
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	mfspr	r10,SPRN_DAR
+	mfspr	r9,SPRN_DSISR
+	srdi	r10,r10,60
+	rlwimi	r10,r9,16,0x20
+	mfcr	r9
+	cmpwi	r10,0x2c
 	beq	.do_stab_bolted_iSeries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG_SCRATCH1
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r11,PACA_EXSLB+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfspr	r12,SPRN_SPRG_SCRATCH0
+	std	r10,PACA_EXGEN+EX_R10(r13)
+	std	r11,PACA_EXGEN+EX_R9(r13)
+	std	r12,PACA_EXGEN+EX_R13(r13)
 END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-	EXCEPTION_PROLOG_1(PACA_EXGEN)
 	EXCEPTION_PROLOG_ISERIES_1
 	b	data_access_common
 
 .do_stab_bolted_iSeries:
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG_SCRATCH1
-	EXCEPTION_PROLOG_1(PACA_EXSLB)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	mfspr	r10,SPRN_SPRG_SCRATCH0
+	std	r10,PACA_EXSLB+EX_R13(r13)
 	EXCEPTION_PROLOG_ISERIES_1
 	b	.do_stab_bolted
 
Index: linux-work/arch/powerpc/include/asm/reg.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/reg.h	2009-07-15 15:23:52.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg.h	2009-07-15 15:24:07.000000000 +1000
@@ -654,7 +654,7 @@
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
  *	- SPRG1 scratch for exception vectors
- *	- SPRG2 scratch for exception vectors
+ *	- SPRG2 unused
  *
  * All 32-bit:
  *	- SPRG3 current thread_info pointer
@@ -707,7 +707,6 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG1
-#define SPRN_SPRG_SCRATCH1	SPRN_SPRG2
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32

^ permalink raw reply

* [PATCH 4/4] powerpc: Change PACA from SPRG3 to SPRG1
From: Benjamin Herrenschmidt @ 2009-07-15  6:52 UTC (permalink / raw)
  To: linuxppc-dev

This change the SPRG used to store the PACA on ppc64 from
SPRG3 to SPRG1. SPRG3 is user readable on most processors
and we want to use it for other things. We change the scratch
SPRG used by exception vectors from SRPG1 to SPRG2.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

 arch/powerpc/include/asm/reg.h |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/reg.h	2009-07-15 15:24:07.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg.h	2009-07-15 15:56:42.000000000 +1000
@@ -649,12 +649,12 @@
  * SPRG usage:
  *
  * All 64-bit:
- *	- SPRG3 stores PACA pointer
+ *	- SPRG1 stores PACA pointer
  *
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
- *	- SPRG1 scratch for exception vectors
- *	- SPRG2 unused
+ *	- SPRG2 scratch for exception vectors
+ *	- SPRG3 unused (user visible)
  *
  * All 32-bit:
  *	- SPRG3 current thread_info pointer
@@ -700,13 +700,13 @@
  *
  */
 #ifdef CONFIG_PPC64
-#define SPRN_SPRG_PACA 		SPRN_SPRG3
+#define SPRN_SPRG_PACA 		SPRN_SPRG1
 #else
 #define SPRN_SPRG_THREAD 	SPRN_SPRG3
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
-#define SPRN_SPRG_SCRATCH0	SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32

^ permalink raw reply

* [PATCH] powerpc/pmac: Fix PowerSurge SMP IPI allocation
From: Benjamin Herrenschmidt @ 2009-07-15  6:56 UTC (permalink / raw)
  To: linuxppc-dev

The code for setting up the IPIs for SMP PowerSurge marchines bitrot,
it needs to properly map the HW interrupt number

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
--

 arch/powerpc/platforms/powermac/smp.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- linux-work.orig/arch/powerpc/platforms/powermac/smp.c	2009-07-15 16:55:28.000000000 +1000
+++ linux-work/arch/powerpc/platforms/powermac/smp.c	2009-07-15 16:55:36.000000000 +1000
@@ -408,7 +408,7 @@ static void __init smp_psurge_setup_cpu(
 	/* reset the entry point so if we get another intr we won't
 	 * try to startup again */
 	out_be32(psurge_start, 0x100);
-	if (setup_irq(30, &psurge_irqaction))
+	if (setup_irq(irq_create_mapping(NULL, 30), &psurge_irqaction))
 		printk(KERN_ERR "Couldn't get primary IPI interrupt");
 }
 

^ permalink raw reply

* [RFC/PATCH] mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()
From: Benjamin Herrenschmidt @ 2009-07-15  7:49 UTC (permalink / raw)
  To: Linux Memory Management, Linux-Arch, linux-kernel, linuxppc-dev
  Cc: Nick Piggin, Hugh Dickins

Upcoming paches to support the new 64-bit "BookE" powerpc architecture
will need to have the virtual address corresponding to PTE page when
freeing it, due to the way the HW table walker works.

Basically, the TLB can be loaded with "large" pages that cover the whole
virtual space (well, sort-of, half of it actually) represented by a PTE
page, and which contain an "indirect" bit indicating that this TLB entry
RPN points to an array of PTEs from which the TLB can then create direct
entries. Thus, in order to invalidate those when PTE pages are deleted,
we need the virtual address to pass to tlbilx or tlbivax instructions.

The old trick of sticking it somewhere in the PTE page struct page sucks
too much, the address is almost readily available in all call sites and
almost everybody implemets these as macros, so we may as well add the
argument everywhere. I added it to the pmd and pud variants for consistency.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

I would like to merge the new support that depends on this in 2.6.32,
so unless there's major objections, I'd like this to go in early during
the merge window. We can sort out separately how to carry the patch
around in -next until then since the powerpc tree will have a dependency
on it.

I haven't had a chance to test or even build on most architectures, the
patch is reasonably trivial but I may have screwed up regardless, I
appologize in advance, let me know if something is wrong.

 arch/alpha/include/asm/tlb.h             |    4 ++--
 arch/arm/include/asm/tlb.h               |    4 ++--
 arch/avr32/include/asm/pgalloc.h         |    2 +-
 arch/cris/include/asm/pgalloc.h          |    2 +-
 arch/frv/include/asm/pgalloc.h           |    4 ++--
 arch/frv/include/asm/pgtable.h           |    2 +-
 arch/ia64/include/asm/pgalloc.h          |    6 +++---
 arch/ia64/include/asm/tlb.h              |   12 ++++++------
 arch/m32r/include/asm/pgalloc.h          |    4 ++--
 arch/m68k/include/asm/motorola_pgalloc.h |    6 ++++--
 arch/m68k/include/asm/sun3_pgalloc.h     |    4 ++--
 arch/microblaze/include/asm/pgalloc.h    |    4 ++--
 arch/mips/include/asm/pgalloc.h          |    6 +++---
 arch/mn10300/include/asm/pgalloc.h       |    2 +-
 arch/parisc/include/asm/tlb.h            |    4 ++--
 arch/powerpc/include/asm/pgalloc-32.h    |    2 +-
 arch/powerpc/include/asm/pgalloc-64.h    |    4 ++--
 arch/powerpc/include/asm/pgalloc.h       |    6 +++---
 arch/powerpc/mm/hugetlbpage.c            |    4 ++--
 arch/s390/include/asm/tlb.h              |    9 ++++++---
 arch/sh/include/asm/pgalloc.h            |    4 ++--
 arch/sh/include/asm/tlb.h                |    6 +++---
 arch/sparc/include/asm/pgalloc_32.h      |    8 ++++----
 arch/sparc/include/asm/tlb_64.h          |    6 +++---
 arch/um/include/asm/pgalloc.h            |    4 ++--
 arch/um/include/asm/tlb.h                |    6 +++---
 arch/x86/include/asm/pgalloc.h           |   25 ++++++++++++++++++++++---
 arch/x86/mm/pgtable.c                    |    6 +++---
 arch/xtensa/include/asm/tlb.h            |    2 +-
 include/asm-generic/4level-fixup.h       |    4 ++--
 include/asm-generic/pgtable-nopmd.h      |    2 +-
 include/asm-generic/pgtable-nopud.h      |    2 +-
 include/asm-generic/tlb.h                |   12 ++++++------
 mm/memory.c                              |   11 ++++++-----
 34 files changed, 107 insertions(+), 82 deletions(-)

--- linux-work.orig/arch/powerpc/include/asm/pgalloc-32.h	2009-02-05 16:22:24.000000000 +1100
+++ linux-work/arch/powerpc/include/asm/pgalloc-32.h	2009-07-15 17:42:43.000000000 +1000
@@ -16,7 +16,7 @@ extern void pgd_free(struct mm_struct *m
  */
 /* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
 #define pmd_free(mm, x) 		do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 #ifndef CONFIG_BOOKE
Index: linux-work/arch/powerpc/include/asm/pgalloc-64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgalloc-64.h	2009-02-05 16:22:24.000000000 +1100
+++ linux-work/arch/powerpc/include/asm/pgalloc-64.h	2009-07-15 17:42:43.000000000 +1000
@@ -118,11 +118,11 @@ static inline void pgtable_free(pgtable_
 		kmem_cache_free(pgtable_cache[cachenum], p);
 }
 
-#define __pmd_free_tlb(tlb, pmd) 	\
+#define __pmd_free_tlb(tlb, pmd,addr)		      \
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
 #ifndef CONFIG_PPC_64K_PAGES
-#define __pud_free_tlb(tlb, pud)	\
+#define __pud_free_tlb(tlb, pud, addr)		      \
 	pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
 		PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
 #endif /* CONFIG_PPC_64K_PAGES */
Index: linux-work/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgalloc.h	2009-02-05 16:22:24.000000000 +1100
+++ linux-work/arch/powerpc/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -38,14 +38,14 @@ static inline pgtable_free_t pgtable_fre
 extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
 
 #ifdef CONFIG_SMP
-#define __pte_free_tlb(tlb,ptepage)	\
+#define __pte_free_tlb(tlb,ptepage,address)		\
 do { \
 	pgtable_page_dtor(ptepage); \
 	pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
-		PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+					PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
 } while (0)
 #else
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, (pte))
 #endif
 
 
Index: linux-work/include/asm-generic/pgtable-nopmd.h
===================================================================
--- linux-work.orig/include/asm-generic/pgtable-nopmd.h	2009-02-05 16:23:01.000000000 +1100
+++ linux-work/include/asm-generic/pgtable-nopmd.h	2009-07-15 17:42:43.000000000 +1000
@@ -59,7 +59,7 @@ static inline pmd_t * pmd_offset(pud_t *
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 }
-#define __pmd_free_tlb(tlb, x)			do { } while (0)
+#define __pmd_free_tlb(tlb, x, a)		do { } while (0)
 
 #undef  pmd_addr_end
 #define pmd_addr_end(addr, end)			(end)
Index: linux-work/include/asm-generic/pgtable-nopud.h
===================================================================
--- linux-work.orig/include/asm-generic/pgtable-nopud.h	2009-02-05 16:23:01.000000000 +1100
+++ linux-work/include/asm-generic/pgtable-nopud.h	2009-07-15 17:42:43.000000000 +1000
@@ -52,7 +52,7 @@ static inline pud_t * pud_offset(pgd_t *
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
-#define __pud_free_tlb(tlb, x)			do { } while (0)
+#define __pud_free_tlb(tlb, x, a)		do { } while (0)
 
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)			(end)
Index: linux-work/include/asm-generic/tlb.h
===================================================================
--- linux-work.orig/include/asm-generic/tlb.h	2009-02-05 16:23:01.000000000 +1100
+++ linux-work/include/asm-generic/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -123,24 +123,24 @@ static inline void tlb_remove_page(struc
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
-#define pte_free_tlb(tlb, ptep)					\
+#define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pte_free_tlb(tlb, ptep);			\
+		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
 
 #ifndef __ARCH_HAS_4LEVEL_HACK
-#define pud_free_tlb(tlb, pudp)					\
+#define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pud_free_tlb(tlb, pudp);			\
+		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
 
-#define pmd_free_tlb(tlb, pmdp)					\
+#define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pmd_free_tlb(tlb, pmdp);			\
+		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
 
 #define tlb_migrate_finish(mm) do {} while (0)
Index: linux-work/mm/memory.c
===================================================================
--- linux-work.orig/mm/memory.c	2009-07-08 15:53:55.000000000 +1000
+++ linux-work/mm/memory.c	2009-07-15 17:42:43.000000000 +1000
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd)
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
-static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
 {
 	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
-	pte_free_tlb(tlb, token);
+	pte_free_tlb(tlb, token, addr);
 	tlb->mm->nr_ptes--;
 }
 
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		free_pte_range(tlb, pmd);
+		free_pte_range(tlb, pmd, addr);
 	} while (pmd++, addr = next, addr != end);
 
 	start &= PUD_MASK;
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct
 
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
-	pmd_free_tlb(tlb, pmd);
+	pmd_free_tlb(tlb, pmd, start);
 }
 
 static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct
 
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud);
+	pud_free_tlb(tlb, pud, start);
 }
 
 /*
Index: linux-work/arch/powerpc/mm/hugetlbpage.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/hugetlbpage.c	2009-02-05 16:22:25.000000000 +1100
+++ linux-work/arch/powerpc/mm/hugetlbpage.c	2009-07-15 17:42:43.000000000 +1000
@@ -305,7 +305,7 @@ static void hugetlb_free_pmd_range(struc
 
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
-	pmd_free_tlb(tlb, pmd);
+	pmd_free_tlb(tlb, pmd, start);
 }
 
 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -348,7 +348,7 @@ static void hugetlb_free_pud_range(struc
 
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud);
+	pud_free_tlb(tlb, pud, start);
 }
 
 /*
Index: linux-work/arch/alpha/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/alpha/include/asm/tlb.h	2009-02-05 16:22:17.000000000 +1100
+++ linux-work/arch/alpha/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -9,7 +9,7 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb, pte)			pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd)			pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte, address)		pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, address)		pmd_free((tlb)->mm, pmd)
  
 #endif
Index: linux-work/arch/avr32/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/avr32/include/asm/pgalloc.h	2009-02-05 16:22:19.000000000 +1100
+++ linux-work/arch/avr32/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -83,7 +83,7 @@ static inline void pte_free(struct mm_st
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
Index: linux-work/arch/cris/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/cris/include/asm/pgalloc.h	2009-02-05 16:22:19.000000000 +1100
+++ linux-work/arch/cris/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -47,7 +47,7 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
Index: linux-work/arch/frv/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/frv/include/asm/pgalloc.h	2009-04-30 14:19:03.000000000 +1000
+++ linux-work/arch/frv/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -49,7 +49,7 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb),(pte));			\
@@ -62,7 +62,7 @@ do {							\
  */
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *) 2); })
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
 
 #endif /* CONFIG_MMU */
 
Index: linux-work/arch/ia64/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/ia64/include/asm/pgalloc.h	2009-02-05 16:22:19.000000000 +1100
+++ linux-work/arch/ia64/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -48,7 +48,7 @@ static inline void pud_free(struct mm_st
 {
 	quicklist_free(0, NULL, pud);
 }
-#define __pud_free_tlb(tlb, pud)	pud_free((tlb)->mm, pud)
+#define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_4 */
 
 static inline void
@@ -67,7 +67,7 @@ static inline void pmd_free(struct mm_st
 	quicklist_free(0, NULL, pmd);
 }
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
+#define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
 
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
@@ -117,6 +117,6 @@ static inline void check_pgt_cache(void)
 	quicklist_trim(0, NULL, 25, 16);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
 
 #endif				/* _ASM_IA64_PGALLOC_H */
Index: linux-work/arch/ia64/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/ia64/include/asm/tlb.h	2009-02-05 16:22:19.000000000 +1100
+++ linux-work/arch/ia64/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -236,22 +236,22 @@ do {							\
 	__tlb_remove_tlb_entry(tlb, ptep, addr);	\
 } while (0)
 
-#define pte_free_tlb(tlb, ptep)				\
+#define pte_free_tlb(tlb, ptep, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pte_free_tlb(tlb, ptep);			\
+	__pte_free_tlb(tlb, ptep, address);		\
 } while (0)
 
-#define pmd_free_tlb(tlb, ptep)				\
+#define pmd_free_tlb(tlb, ptep, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pmd_free_tlb(tlb, ptep);			\
+	__pmd_free_tlb(tlb, ptep, address);		\
 } while (0)
 
-#define pud_free_tlb(tlb, pudp)				\
+#define pud_free_tlb(tlb, pudp, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pud_free_tlb(tlb, pudp);			\
+	__pud_free_tlb(tlb, pudp, address);		\
 } while (0)
 
 #endif /* _ASM_IA64_TLB_H */
Index: linux-work/arch/m32r/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/m32r/include/asm/pgalloc.h	2009-04-30 14:19:03.000000000 +1000
+++ linux-work/arch/m32r/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -58,7 +58,7 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
@@ -68,7 +68,7 @@ static inline void pte_free(struct mm_st
 
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
 #define check_pgt_cache()	do { } while (0)
Index: linux-work/arch/m68k/include/asm/motorola_pgalloc.h
===================================================================
--- linux-work.orig/arch/m68k/include/asm/motorola_pgalloc.h	2009-02-05 16:22:20.000000000 +1100
+++ linux-work/arch/m68k/include/asm/motorola_pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -54,7 +54,8 @@ static inline void pte_free(struct mm_st
 	__free_page(page);
 }
 
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
+				  unsigned long address)
 {
 	pgtable_page_dtor(page);
 	cache_page(kmap(page));
@@ -73,7 +74,8 @@ static inline int pmd_free(struct mm_str
 	return free_pointer_table(pmd);
 }
 
-static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				 unsigned long address)
 {
 	return free_pointer_table(pmd);
 }
Index: linux-work/arch/m68k/include/asm/sun3_pgalloc.h
===================================================================
--- linux-work.orig/arch/m68k/include/asm/sun3_pgalloc.h	2009-02-05 16:22:20.000000000 +1100
+++ linux-work/arch/m68k/include/asm/sun3_pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -32,7 +32,7 @@ static inline void pte_free(struct mm_st
         __free_page(page);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
@@ -80,7 +80,7 @@ static inline void pmd_populate(struct m
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
Index: linux-work/arch/microblaze/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/microblaze/include/asm/pgalloc.h	2009-07-08 15:53:49.000000000 +1000
+++ linux-work/arch/microblaze/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -180,7 +180,7 @@ extern inline void pte_free(struct mm_st
 	__free_page(ptepage);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
 
 #define pmd_populate(mm, pmd, pte)	(pmd_val(*(pmd)) = page_address(pte))
 
@@ -193,7 +193,7 @@ extern inline void pte_free(struct mm_st
  */
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 /*#define pmd_free(mm, x)			do { } while (0)*/
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
 extern int do_check_pgt_cache(int, int);
Index: linux-work/arch/mips/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/mips/include/asm/pgalloc.h	2009-02-05 16:22:21.000000000 +1100
+++ linux-work/arch/mips/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -98,7 +98,7 @@ static inline void pte_free(struct mm_st
 	__free_pages(pte, PTE_ORDER);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
@@ -111,7 +111,7 @@ do {							\
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 #endif
 
@@ -132,7 +132,7 @@ static inline void pmd_free(struct mm_st
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
-#define __pmd_free_tlb(tlb, x)	pmd_free((tlb)->mm, x)
+#define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
 
 #endif
 
Index: linux-work/arch/mn10300/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/mn10300/include/asm/pgalloc.h	2009-04-30 14:19:03.000000000 +1000
+++ linux-work/arch/mn10300/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -51,6 +51,6 @@ static inline void pte_free(struct mm_st
 }
 
 
-#define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb, pte, addr) tlb_remove_page((tlb), (pte))
 
 #endif /* _ASM_PGALLOC_H */
Index: linux-work/arch/parisc/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/parisc/include/asm/tlb.h	2009-02-05 16:22:21.000000000 +1100
+++ linux-work/arch/parisc/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -21,7 +21,7 @@ do {	if (!(tlb)->fullmm)	\
 
 #include <asm-generic/tlb.h>
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif
Index: linux-work/arch/s390/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/s390/include/asm/tlb.h	2009-02-05 16:22:25.000000000 +1100
+++ linux-work/arch/s390/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -96,7 +96,8 @@ static inline void tlb_remove_page(struc
  * pte_free_tlb frees a pte table and clears the CRSTE for the
  * page table from the tlb.
  */
-static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte)
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+				unsigned long address)
 {
 	if (!tlb->fullmm) {
 		tlb->array[tlb->nr_ptes++] = pte;
@@ -113,7 +114,8 @@ static inline void pte_free_tlb(struct m
  * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
  * to avoid the double free of the pmd in this case.
  */
-static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				unsigned long address)
 {
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
@@ -134,7 +136,8 @@ static inline void pmd_free_tlb(struct m
  * as the pgd. pud_free_tlb checks the asce_limit against 4TB
  * to avoid the double free of the pud in this case.
  */
-static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				unsigned long address)
 {
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
Index: linux-work/arch/sh/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/sh/include/asm/pgalloc.h	2009-02-05 16:22:26.000000000 +1100
+++ linux-work/arch/sh/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -73,7 +73,7 @@ static inline void pte_free(struct mm_st
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), (pte));			\
@@ -85,7 +85,7 @@ do {							\
  */
 
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,addr)	do { } while (0)
 
 static inline void check_pgt_cache(void)
 {
Index: linux-work/arch/sh/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/sh/include/asm/tlb.h	2009-03-31 13:22:05.000000000 +1100
+++ linux-work/arch/sh/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -91,9 +91,9 @@ tlb_end_vma(struct mmu_gather *tlb, stru
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb, ptep)		pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp)		pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp)		pud_free((tlb)->mm, pudp)
+#define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
+#define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
+#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
Index: linux-work/arch/sparc/include/asm/pgalloc_32.h
===================================================================
--- linux-work.orig/arch/sparc/include/asm/pgalloc_32.h	2009-02-05 16:22:26.000000000 +1100
+++ linux-work/arch/sparc/include/asm/pgalloc_32.h	2009-07-15 17:42:43.000000000 +1000
@@ -44,8 +44,8 @@ BTFIXUPDEF_CALL(pmd_t *, pmd_alloc_one, 
 BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *)
 #define free_pmd_fast(pmd)	BTFIXUP_CALL(free_pmd_fast)(pmd)
 
-#define pmd_free(mm, pmd)	free_pmd_fast(pmd)
-#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)
+#define pmd_free(mm, pmd)		free_pmd_fast(pmd)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
 
 BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
 #define pmd_populate(MM, PMD, PTE)        BTFIXUP_CALL(pmd_populate)(PMD, PTE)
@@ -62,7 +62,7 @@ BTFIXUPDEF_CALL(void, free_pte_fast, pte
 #define pte_free_kernel(mm, pte)	BTFIXUP_CALL(free_pte_fast)(pte)
 
 BTFIXUPDEF_CALL(void, pte_free, pgtable_t )
-#define pte_free(mm, pte)	BTFIXUP_CALL(pte_free)(pte)
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define pte_free(mm, pte)		BTFIXUP_CALL(pte_free)(pte)
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif /* _SPARC_PGALLOC_H */
Index: linux-work/arch/sparc/include/asm/tlb_64.h
===================================================================
--- linux-work.orig/arch/sparc/include/asm/tlb_64.h	2009-03-31 13:22:05.000000000 +1100
+++ linux-work/arch/sparc/include/asm/tlb_64.h	2009-07-15 17:42:43.000000000 +1000
@@ -100,9 +100,9 @@ static inline void tlb_remove_page(struc
 }
 
 #define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp, ptepage) pte_free((mp)->mm, ptepage)
-#define pmd_free_tlb(mp, pmdp) pmd_free((mp)->mm, pmdp)
-#define pud_free_tlb(tlb,pudp) __pud_free_tlb(tlb,pudp)
+#define pte_free_tlb(mp, ptepage, addr) pte_free((mp)->mm, ptepage)
+#define pmd_free_tlb(mp, pmdp, addr) pmd_free((mp)->mm, pmdp)
+#define pud_free_tlb(tlb,pudp, addr) __pud_free_tlb(tlb,pudp,addr)
 
 #define tlb_migrate_finish(mm)	do { } while (0)
 #define tlb_start_vma(tlb, vma) do { } while (0)
Index: linux-work/arch/um/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/um/include/asm/pgalloc.h	2009-04-30 14:19:04.000000000 +1000
+++ linux-work/arch/um/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -40,7 +40,7 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte, address)		\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb),(pte));			\
@@ -53,7 +53,7 @@ static inline void pmd_free(struct mm_st
 	free_page((unsigned long)pmd);
 }
 
-#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+#define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
 #define check_pgt_cache()	do { } while (0)
Index: linux-work/arch/um/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/um/include/asm/tlb.h	2009-02-05 16:22:28.000000000 +1100
+++ linux-work/arch/um/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -116,11 +116,11 @@ static inline void tlb_remove_page(struc
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
-#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep)
+#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
 
-#define pud_free_tlb(tlb, pudp) __pud_free_tlb(tlb, pudp)
+#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
 
-#define pmd_free_tlb(tlb, pmdp) __pmd_free_tlb(tlb, pmdp)
+#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
Index: linux-work/arch/x86/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/x86/include/asm/pgalloc.h	2009-02-05 16:22:28.000000000 +1100
+++ linux-work/arch/x86/include/asm/pgalloc.h	2009-07-15 17:42:43.000000000 +1000
@@ -46,7 +46,13 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+				  unsigned long address)
+{
+	___pte_free_tlb(tlb, pte);
+}
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
 				       pmd_t *pmd, pte_t *pte)
@@ -78,7 +84,13 @@ static inline void pmd_free(struct mm_st
 	free_page((unsigned long)pmd);
 }
 
-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long adddress)
+{
+	___pmd_free_tlb(tlb, pmd);
+}
 
 #ifdef CONFIG_X86_PAE
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
@@ -108,7 +120,14 @@ static inline void pud_free(struct mm_st
 	free_page((unsigned long)pud);
 }
 
-extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long address)
+{
+	___pud_free_tlb(tlb, pud);
+}
+
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
Index: linux-work/arch/x86/mm/pgtable.c
===================================================================
--- linux-work.orig/arch/x86/mm/pgtable.c	2009-07-08 15:53:51.000000000 +1000
+++ linux-work/arch/x86/mm/pgtable.c	2009-07-15 17:42:43.000000000 +1000
@@ -25,7 +25,7 @@ pgtable_t pte_alloc_one(struct mm_struct
 	return pte;
 }
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
@@ -33,14 +33,14 @@ void __pte_free_tlb(struct mmu_gather *t
 }
 
 #if PAGETABLE_LEVELS > 2
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
 #if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
Index: linux-work/arch/xtensa/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/xtensa/include/asm/tlb.h	2009-02-05 16:22:29.000000000 +1100
+++ linux-work/arch/xtensa/include/asm/tlb.h	2009-07-15 17:42:43.000000000 +1000
@@ -42,6 +42,6 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb, pte)		pte_free((tlb)->mm, pte)
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
 
 #endif	/* _XTENSA_TLB_H */
Index: linux-work/arch/arm/include/asm/tlb.h
===================================================================
--- linux-work.orig/arch/arm/include/asm/tlb.h	2009-07-15 17:45:18.000000000 +1000
+++ linux-work/arch/arm/include/asm/tlb.h	2009-07-15 17:45:28.000000000 +1000
@@ -102,8 +102,8 @@ tlb_end_vma(struct mmu_gather *tlb, stru
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb, ptep)		pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp)		pmd_free((tlb)->mm, pmdp)
+#define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
+#define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
Index: linux-work/arch/frv/include/asm/pgtable.h
===================================================================
--- linux-work.orig/arch/frv/include/asm/pgtable.h	2009-07-15 17:45:52.000000000 +1000
+++ linux-work/arch/frv/include/asm/pgtable.h	2009-07-15 17:45:59.000000000 +1000
@@ -225,7 +225,7 @@ static inline pud_t *pud_offset(pgd_t *p
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
-#define __pud_free_tlb(tlb, x)			do { } while (0)
+#define __pud_free_tlb(tlb, x, address)		do { } while (0)
 
 /*
  * The "pud_xxx()" functions here are trivial for a folded two-level
Index: linux-work/include/asm-generic/4level-fixup.h
===================================================================
--- linux-work.orig/include/asm-generic/4level-fixup.h	2009-07-15 17:44:05.000000000 +1000
+++ linux-work/include/asm-generic/4level-fixup.h	2009-07-15 17:44:25.000000000 +1000
@@ -27,9 +27,9 @@
 #define pud_page_vaddr(pud)		pgd_page_vaddr(pud)
 
 #undef pud_free_tlb
-#define pud_free_tlb(tlb, x)            do { } while (0)
+#define pud_free_tlb(tlb, x, addr)	do { } while (0)
 #define pud_free(mm, x)			do { } while (0)
-#define __pud_free_tlb(tlb, x)		do { } while (0)
+#define __pud_free_tlb(tlb, x, addr)	do { } while (0)
 
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)		(end)

^ permalink raw reply

* ethernet driver - problem capturing own packet in promiscous mode
From: sudheer a @ 2009-07-15  8:36 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 729 bytes --]

Hi all

In ethernet driver  i need to enable promiscous mode and have to capture the
packet that is sent by the same ethernet.

The board is connected to a packet generator and could send/receive packets
 whenever i need .

In the board ethernet driver , I made sure that am sending only broadcast
packets and promisc mode is enabled but the packets are not captured. If i
am sending a packet to the board from packetgenerator it is receiving.

Could any one please suggest me any clues.

Having the promisc enabled:
Packet sent by packetgenerator is received by board.
Packet sent by board is received by packetgenerator, The same packet should
be captured by board as promiscuous is enabled. but not happening.

Thanks
Sudheer

[-- Attachment #2: Type: text/html, Size: 958 bytes --]

^ permalink raw reply

* RE: ethernet driver - problem capturing own packet in promiscous mode
From: Cote, Sylvain @ 2009-07-15 12:22 UTC (permalink / raw)
  To: sudheer a; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <4312c3250907150136y4c98c78ak1b4fe50534248442@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1728 bytes --]

> Hi all

> In ethernet driver  i need to enable promiscous mode and have to capture the packet that is sent by the same ethernet.

> The board is connected to a packet generator and could send/receive packets  whenever i need .

> In the board ethernet driver , I made sure that am sending only broadcast packets and promisc mode is enabled but the packets are not captured. If i am sending a packet to the board from packetgenerator it is receiving.

> Could any one please suggest me any clues.

> Having the promisc enabled:
> Packet sent by packetgenerator is received by board.
> Packet sent by board is received by packetgenerator, The same packet should be captured by board as promiscuous is enabled. but not happening.


The Ethernet interface that sent the packet will never receive the packet it sent even if you are in promiscuous mode.  To be able to do that you should put your interface in loopback mode.
In promiscuous mode, you will be able to receive any packets sent by other interfaces (broadcast, multicast and also unicast that is not directed to you interface MAC address).  But not
from your interface.

Regards

Sylvain

This electronic message may contain proprietary and confidential information of Verint Systems Inc., its affiliates and/or subsidiaries.
The information is intended to be for the use of the individual(s) or
entity(ies) named above.  If you are not the intended recipient (or authorized to receive this e-mail for the intended recipient), you may not use, copy, disclose or distribute to anyone this message or any information contained in this message.  If you have received this electronic message in error, please notify us by replying to this e-mail.
\r

[-- Attachment #2: Type: text/html, Size: 6177 bytes --]

^ permalink raw reply

* Re: [RFC/PATCH] mm: Pass virtual address to [__]p{te, ud, md}_free_tlb()
From: Nick Piggin @ 2009-07-15 13:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Linux-Arch, Linux Memory Management, Hugh Dickins, linux-kernel,
	linuxppc-dev
In-Reply-To: <20090715074952.A36C7DDDB2@ozlabs.org>

On Wed, Jul 15, 2009 at 05:49:47PM +1000, Benjamin Herrenschmidt wrote:
> Upcoming paches to support the new 64-bit "BookE" powerpc architecture
> will need to have the virtual address corresponding to PTE page when
> freeing it, due to the way the HW table walker works.
> 
> Basically, the TLB can be loaded with "large" pages that cover the whole
> virtual space (well, sort-of, half of it actually) represented by a PTE
> page, and which contain an "indirect" bit indicating that this TLB entry
> RPN points to an array of PTEs from which the TLB can then create direct
> entries.

RPN is PFN in ppc speak, right?


> Thus, in order to invalidate those when PTE pages are deleted,
> we need the virtual address to pass to tlbilx or tlbivax instructions.

Interesting arrangement. So are these last level ptes modifieable
from userspace or something? If not, I wonder if you could manage
them as another level of pointers with the existing pagetable
functions?
 

> The old trick of sticking it somewhere in the PTE page struct page sucks
> too much, the address is almost readily available in all call sites and
> almost everybody implemets these as macros, so we may as well add the
> argument everywhere. I added it to the pmd and pud variants for consistency.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
> 
> I would like to merge the new support that depends on this in 2.6.32,
> so unless there's major objections, I'd like this to go in early during
> the merge window. We can sort out separately how to carry the patch
> around in -next until then since the powerpc tree will have a dependency
> on it.

Can't see any problem with that.

^ permalink raw reply

* having access to interrupt specifier in map() function
From: Kumar Gala @ 2009-07-15 14:11 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev@ozlabs.org list

Ben,

Do you have any ideas on keeping access to the interrupt specifier  
around so when we call map() we have access to it.  Our HV guys are  
looking at using additional bits in the interrupt specifier to encode  
information beyond just level/sense of the IRQ and want to make  
decisions based on it during map().

Maybe we can keep it around in irq_map[].

thoughts?

- k

^ permalink raw reply

* booting MPC8313 based board with yaffs2 RFS
From: Rupesh Kumar @ 2009-07-15 14:58 UTC (permalink / raw)
  To: linuxppc-dev

Hi
I am using MPC8313 board which is currently booting with JFFS2 root file 
system. 
I am using linux kernel version 2.6.23 from FreeScale's LTIB for MPC8313. 

As, I want it to boot with YAFFS2 root file system, I did compile kernel 
with yaffs2 support, craeted yaffs2 rootfile system and passed yaffs2 
partiton of nand in bootargs. However it didnot work.


If any one has done it successfully, can please share the steps to be 
followed ?

Thanks
Rupesh

^ permalink raw reply

* [PATCH 1/2 v3] fs_enet/mii-fec.c: fix MII speed calculation
From: Wolfgang Denk @ 2009-07-15 15:18 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: netdev, Wolfgang Denk
In-Reply-To: <1247578966-9847-1-git-send-email-wd@denx.de>

The MII speed calculation was based on the CPU clock (ppc_proc_freq),
but for MPC512x we must use the bus clock instead.

This patch makes it use the correct clock and makes sure we don't
clobber reserved bits in the MII_SPEED register.

Signed-off-by: Wolfgang Denk <wd@denx.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: <netdev@vger.kernel.org>
---
Please ignore patch v2, it's crap.
Hope this is a bit better.

 arch/powerpc/include/asm/mpc5xxx.h   |   10 +++++++++
 arch/powerpc/sysdev/mpc5xxx_clocks.c |   37 ++++++++++++++++++++++++++++++++++
 drivers/net/fs_enet/mii-fec.c        |   13 +++++++++--
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/asm/mpc5xxx.h
index 5ce9c5f..86ab29f 100644
--- a/arch/powerpc/include/asm/mpc5xxx.h
+++ b/arch/powerpc/include/asm/mpc5xxx.h
@@ -15,8 +15,18 @@
 
 #ifndef __ASM_POWERPC_MPC5xxx_H__
 #define __ASM_POWERPC_MPC5xxx_H__
+#include <linux/of_platform.h>
 
 extern unsigned long mpc5xxx_get_bus_frequency(struct device_node *node);
 
+#if defined(CONFIG_PPC_MPC512x) || defined(CONFIG_PPC_MPC52xx)
+extern int mpc5xxx_get_mii_speed(struct of_device *ofdev);
+#else
+static inline int mpc5xxx_get_mii_speed(struct of_device *ofdev)
+{
+	return -1;
+}
+#endif
+
 #endif /* __ASM_POWERPC_MPC5xxx_H__ */
 
diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c
index 34e12f9..e26d12b 100644
--- a/arch/powerpc/sysdev/mpc5xxx_clocks.c
+++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
@@ -31,3 +31,40 @@ mpc5xxx_get_bus_frequency(struct device_node *node)
 	return p_bus_freq ? *p_bus_freq : 0;
 }
 EXPORT_SYMBOL(mpc5xxx_get_bus_frequency);
+
+/**
+ *	mpc5xxx_get_mii_speed - Get the MII_SPEED value
+ *	@node:	device node
+ *
+ *	Returns the MII_SPEED value for MPC512x and MPC52xx systems.
+ *	The value gets computed such that the resulting MDC frequency
+ *	is 2.5 MHz or lower.
+ */
+
+int
+mpc5xxx_get_mii_speed(struct of_device *ofdev)
+{
+	unsigned int clock, speed;
+
+	clock = mpc5xxx_get_bus_frequency(ofdev->node);
+
+	if (!clock) {
+		dev_err(&ofdev->dev, "could not determine IPS/IPB clock\n");
+		return -ENODEV;
+	}
+
+	/* scale for a MII clock <= 2.5 MHz */
+	speed = (clock + 2499999) / 2500000;
+
+	/* only 6 bits available for MII speed */
+	if (speed > 0x3F) {
+		speed = 0x3F;
+		dev_err(&ofdev->dev,
+			"MII clock (%d MHz) exceeds max (2.5 MHz)\n",
+			clock / speed);
+	}
+
+	/* Field is in bits 25:30 of MII_SPEED register */
+	return speed << 1;
+}
+EXPORT_SYMBOL(mpc5xxx_get_mii_speed);
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 75a0999..a28d39f 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -36,6 +36,7 @@
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
+#include <asm/mpc5xxx.h>
 
 #include "fs_enet.h"
 #include "fec.h"
@@ -103,7 +104,6 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
                                         const struct of_device_id *match)
 {
-	struct device_node *np = NULL;
 	struct resource res;
 	struct mii_bus *new_bus;
 	struct fec_info *fec;
@@ -133,13 +133,20 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
 	if (!fec->fecp)
 		goto out_fec;
 
-	fec->mii_speed = ((ppc_proc_freq + 4999999) / 5000000) << 1;
+	if (of_device_is_compatible(ofdev->node, "fsl,mpc5121-fec-mdio")) {
+		i = mpc5xxx_get_mii_speed(ofdev);
+		if (i < 0)
+			goto out_unmap_regs;
+		fec->mii_speed = i;
+	} else {
+		fec->mii_speed = ((ppc_proc_freq + 4999999) / 5000000) << 1;
+	}
 
 	setbits32(&fec->fecp->fec_r_cntrl, FEC_RCNTRL_MII_MODE);
 	setbits32(&fec->fecp->fec_ecntrl, FEC_ECNTRL_PINMUX |
 	                                  FEC_ECNTRL_ETHER_EN);
 	out_be32(&fec->fecp->fec_ievent, FEC_ENET_MII);
-	out_be32(&fec->fecp->fec_mii_speed, fec->mii_speed);
+	clrsetbits_be32(&fec->fecp->fec_mii_speed, 0x7E, fec->mii_speed);
 
 	new_bus->phy_mask = ~0;
 	new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
-- 
1.6.0.6

^ permalink raw reply related

* [PATCH 2/2] MPC52xx FEC: be more conservative when setting MII_SPEED register
From: Wolfgang Denk @ 2009-07-15 15:18 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: netdev, Wolfgang Denk
In-Reply-To: <1247578966-9847-1-git-send-email-wd@denx.de>

This patch adds error checking and prevents clobbering unrelated bits
(reserved bits or the DIS_PREAMBLE bit) when writing the MII_SPEED
register on MPC52xx systems.

Signed-off-by: Wolfgang Denk <wd@denx.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: <netdev@vger.kernel.org>
---
 drivers/net/fec_mpc52xx.c     |    2 +-
 drivers/net/fec_mpc52xx_phy.c |    6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index cc78633..b69d440 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -639,7 +639,7 @@ static void mpc52xx_fec_hw_init(struct net_device *dev)
 	/* set phy speed.
 	 * this can't be done in phy driver, since it needs to be called
 	 * before fec stuff (even on resume) */
-	out_be32(&fec->mii_speed, priv->mdio_speed);
+	clrsetbits_be32(&fec->mii_speed, 0x7E, priv->mdio_speed);
 }
 
 /**
diff --git a/drivers/net/fec_mpc52xx_phy.c b/drivers/net/fec_mpc52xx_phy.c
index 31e6d62..f733d43 100644
--- a/drivers/net/fec_mpc52xx_phy.c
+++ b/drivers/net/fec_mpc52xx_phy.c
@@ -105,8 +105,10 @@ static int mpc52xx_fec_mdio_probe(struct of_device *of,
 	dev_set_drvdata(dev, bus);
 
 	/* set MII speed */
-	out_be32(&priv->regs->mii_speed,
-		((mpc5xxx_get_bus_frequency(of->node) >> 20) / 5) << 1);
+	i = mpc5xxx_get_mii_speed(of);
+	if (i<0)
+		goto out_unmap;
+	clrsetbits_be32(&priv->regs->mii_speed, 0x7E, i);
 
 	err = of_mdiobus_register(bus, np);
 	if (err)
-- 
1.6.0.6

^ permalink raw reply related

* Re: [PATCH 1/2 v3] fs_enet/mii-fec.c: fix MII speed calculation
From: Grant Likely @ 2009-07-15 17:17 UTC (permalink / raw)
  To: Wolfgang Denk; +Cc: linuxppc-dev, netdev
In-Reply-To: <1247671133-12148-1-git-send-email-wd@denx.de>

On Wed, Jul 15, 2009 at 9:18 AM, Wolfgang Denk<wd@denx.de> wrote:
> The MII speed calculation was based on the CPU clock (ppc_proc_freq),
> but for MPC512x we must use the bus clock instead.
>
> This patch makes it use the correct clock and makes sure we don't
> clobber reserved bits in the MII_SPEED register.
>
> Signed-off-by: Wolfgang Denk <wd@denx.de>
> Cc: Grant Likely <grant.likely@secretlab.ca>
> Cc: Kumar Gala <galak@kernel.crashing.org>
> Cc: <netdev@vger.kernel.org>
> ---
> Please ignore patch v2, it's crap.
> Hope this is a bit better.
>
> =A0arch/powerpc/include/asm/mpc5xxx.h =A0 | =A0 10 +++++++++
> =A0arch/powerpc/sysdev/mpc5xxx_clocks.c | =A0 37 ++++++++++++++++++++++++=
++++++++++

Drop the common code bit.  The 5200 and 5121 are different devices and
it is a tiny bit of code.  I don't think there is any benefit to
having it as a common function.  Just roll the get_mii_speed function
in the mii-fec driver itself.

Also, this patch can be quite a bit simpler if you use the .data
pointer in the drivers match table to specify the function used to
return the bus clock speed.  Something like this:

static struct of_device_id fs_enet_mdio_fec_match[] =3D {
	{
		.compatible =3D "fsl,pq1-fec-mdio",
	},
#if defined(CONFIG_PPC_MPC512x)
	{
		.compatible =3D "fsl,mpc5121-fec-mdio",
		.data =3D mpc5xxx_get_bus_frequency,
	},
#endif
	{},
};

and

int *get_bus_freq(of_node *) =3D data;
if (get_bus_freq)
        bus_freq =3D get_bus_freq(np);
else
        bus_freq =3D ppc_proc_freq / 2;

... then do the regular calculation here and add in the additional
robustification you did in this patch.

Heck, you could even eliminate the if/else above if the normal case
you have a get_bus_speed function for the original ppc_proc_freq case,
but I'm not sure if it is worth it.


> =A0drivers/net/fs_enet/mii-fec.c =A0 =A0 =A0 =A0| =A0 13 +++++++++--
> =A03 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/as=
m/mpc5xxx.h
> index 5ce9c5f..86ab29f 100644
> --- a/arch/powerpc/include/asm/mpc5xxx.h
> +++ b/arch/powerpc/include/asm/mpc5xxx.h
> @@ -15,8 +15,18 @@
>
> =A0#ifndef __ASM_POWERPC_MPC5xxx_H__
> =A0#define __ASM_POWERPC_MPC5xxx_H__
> +#include <linux/of_platform.h>
>
> =A0extern unsigned long mpc5xxx_get_bus_frequency(struct device_node *nod=
e);
>
> +#if defined(CONFIG_PPC_MPC512x) || defined(CONFIG_PPC_MPC52xx)
> +extern int mpc5xxx_get_mii_speed(struct of_device *ofdev);
> +#else
> +static inline int mpc5xxx_get_mii_speed(struct of_device *ofdev)
> +{
> + =A0 =A0 =A0 return -1;
> +}
> +#endif
> +
> =A0#endif /* __ASM_POWERPC_MPC5xxx_H__ */
>
> diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/m=
pc5xxx_clocks.c
> index 34e12f9..e26d12b 100644
> --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c
> +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
> @@ -31,3 +31,40 @@ mpc5xxx_get_bus_frequency(struct device_node *node)
> =A0 =A0 =A0 =A0return p_bus_freq ? *p_bus_freq : 0;
> =A0}
> =A0EXPORT_SYMBOL(mpc5xxx_get_bus_frequency);
> +
> +/**
> + * =A0 =A0 mpc5xxx_get_mii_speed - Get the MII_SPEED value
> + * =A0 =A0 @node: =A0device node
> + *
> + * =A0 =A0 Returns the MII_SPEED value for MPC512x and MPC52xx systems.
> + * =A0 =A0 The value gets computed such that the resulting MDC frequency
> + * =A0 =A0 is 2.5 MHz or lower.
> + */
> +
> +int
> +mpc5xxx_get_mii_speed(struct of_device *ofdev)
> +{
> + =A0 =A0 =A0 unsigned int clock, speed;
> +
> + =A0 =A0 =A0 clock =3D mpc5xxx_get_bus_frequency(ofdev->node);
> +
> + =A0 =A0 =A0 if (!clock) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(&ofdev->dev, "could not determine I=
PS/IPB clock\n");
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return -ENODEV;
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 /* scale for a MII clock <=3D 2.5 MHz */
> + =A0 =A0 =A0 speed =3D (clock + 2499999) / 2500000;
> +
> + =A0 =A0 =A0 /* only 6 bits available for MII speed */
> + =A0 =A0 =A0 if (speed > 0x3F) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 speed =3D 0x3F;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(&ofdev->dev,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "MII clock (%d MHz) exceeds=
 max (2.5 MHz)\n",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 clock / speed);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 /* Field is in bits 25:30 of MII_SPEED register */
> + =A0 =A0 =A0 return speed << 1;
> +}
> +EXPORT_SYMBOL(mpc5xxx_get_mii_speed);
> diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.=
c
> index 75a0999..a28d39f 100644
> --- a/drivers/net/fs_enet/mii-fec.c
> +++ b/drivers/net/fs_enet/mii-fec.c
> @@ -36,6 +36,7 @@
> =A0#include <asm/pgtable.h>
> =A0#include <asm/irq.h>
> =A0#include <asm/uaccess.h>
> +#include <asm/mpc5xxx.h>
>
> =A0#include "fs_enet.h"
> =A0#include "fec.h"
> @@ -103,7 +104,6 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
> =A0static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 const struct of_device_id *match)
> =A0{
> - =A0 =A0 =A0 struct device_node *np =3D NULL;
> =A0 =A0 =A0 =A0struct resource res;
> =A0 =A0 =A0 =A0struct mii_bus *new_bus;
> =A0 =A0 =A0 =A0struct fec_info *fec;
> @@ -133,13 +133,20 @@ static int __devinit fs_enet_mdio_probe(struct of_d=
evice *ofdev,
> =A0 =A0 =A0 =A0if (!fec->fecp)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto out_fec;
>
> - =A0 =A0 =A0 fec->mii_speed =3D ((ppc_proc_freq + 4999999) / 5000000) <<=
 1;
> + =A0 =A0 =A0 if (of_device_is_compatible(ofdev->node, "fsl,mpc5121-fec-m=
dio")) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 i =3D mpc5xxx_get_mii_speed(ofdev);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (i < 0)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out_unmap_regs;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fec->mii_speed =3D i;
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fec->mii_speed =3D ((ppc_proc_freq + 499999=
9) / 5000000) << 1;
> + =A0 =A0 =A0 }
>
> =A0 =A0 =A0 =A0setbits32(&fec->fecp->fec_r_cntrl, FEC_RCNTRL_MII_MODE);
> =A0 =A0 =A0 =A0setbits32(&fec->fecp->fec_ecntrl, FEC_ECNTRL_PINMUX |
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0FEC_ECNTRL_ETHER_EN);
> =A0 =A0 =A0 =A0out_be32(&fec->fecp->fec_ievent, FEC_ENET_MII);
> - =A0 =A0 =A0 out_be32(&fec->fecp->fec_mii_speed, fec->mii_speed);
> + =A0 =A0 =A0 clrsetbits_be32(&fec->fecp->fec_mii_speed, 0x7E, fec->mii_s=
peed);
>
> =A0 =A0 =A0 =A0new_bus->phy_mask =3D ~0;
> =A0 =A0 =A0 =A0new_bus->irq =3D kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_K=
ERNEL);
> --
> 1.6.0.6
>
>



--=20
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* Re: [PATCH 2/2] MPC52xx FEC: be more conservative when setting MII_SPEED register
From: Grant Likely @ 2009-07-15 17:18 UTC (permalink / raw)
  To: Wolfgang Denk; +Cc: linuxppc-dev, netdev
In-Reply-To: <1247671133-12148-2-git-send-email-wd@denx.de>

On Wed, Jul 15, 2009 at 9:18 AM, Wolfgang Denk<wd@denx.de> wrote:
> This patch adds error checking and prevents clobbering unrelated bits
> (reserved bits or the DIS_PREAMBLE bit) when writing the MII_SPEED
> register on MPC52xx systems.
>
> Signed-off-by: Wolfgang Denk <wd@denx.de>
> Cc: Grant Likely <grant.likely@secretlab.ca>
> Cc: Kumar Gala <galak@kernel.crashing.org>
> Cc: <netdev@vger.kernel.org>

As I mentioned in the other patch, I don't want the 5121 and 5200 FEC
devices using common code for this.  It is a tiny block of code and
they are different devices.  Just open code the needed calculation
into this driver.

g.

> ---
> =A0drivers/net/fec_mpc52xx.c =A0 =A0 | =A0 =A02 +-
> =A0drivers/net/fec_mpc52xx_phy.c | =A0 =A06 ++++--
> =A02 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
> index cc78633..b69d440 100644
> --- a/drivers/net/fec_mpc52xx.c
> +++ b/drivers/net/fec_mpc52xx.c
> @@ -639,7 +639,7 @@ static void mpc52xx_fec_hw_init(struct net_device *de=
v)
> =A0 =A0 =A0 =A0/* set phy speed.
> =A0 =A0 =A0 =A0 * this can't be done in phy driver, since it needs to be =
called
> =A0 =A0 =A0 =A0 * before fec stuff (even on resume) */
> - =A0 =A0 =A0 out_be32(&fec->mii_speed, priv->mdio_speed);
> + =A0 =A0 =A0 clrsetbits_be32(&fec->mii_speed, 0x7E, priv->mdio_speed);
> =A0}
>
> =A0/**
> diff --git a/drivers/net/fec_mpc52xx_phy.c b/drivers/net/fec_mpc52xx_phy.=
c
> index 31e6d62..f733d43 100644
> --- a/drivers/net/fec_mpc52xx_phy.c
> +++ b/drivers/net/fec_mpc52xx_phy.c
> @@ -105,8 +105,10 @@ static int mpc52xx_fec_mdio_probe(struct of_device *=
of,
> =A0 =A0 =A0 =A0dev_set_drvdata(dev, bus);
>
> =A0 =A0 =A0 =A0/* set MII speed */
> - =A0 =A0 =A0 out_be32(&priv->regs->mii_speed,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 ((mpc5xxx_get_bus_frequency(of->node) >> 20=
) / 5) << 1);
> + =A0 =A0 =A0 i =3D mpc5xxx_get_mii_speed(of);
> + =A0 =A0 =A0 if (i<0)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out_unmap;
> + =A0 =A0 =A0 clrsetbits_be32(&priv->regs->mii_speed, 0x7E, i);
>
> =A0 =A0 =A0 =A0err =3D of_mdiobus_register(bus, np);
> =A0 =A0 =A0 =A0if (err)
> --
> 1.6.0.6
>
>



--=20
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* Re: Support for PCI Express reset type in EEH
From: Mike Mason @ 2009-07-15 18:32 UTC (permalink / raw)
  To: linuxppc-dev, linux-pci, Paul Mackerras, benh, linasvepstas; +Cc: Richard Lary
In-Reply-To: <4A5CCFDF.7000901@us.ibm.com>

This patch was simultaneously submitted to Red Hat for review.  As a result of that review, I'm withdrawing this patch and will submit a new version shortly.

Mike

Mike Mason wrote:
> By default, EEH does what's known as a "hot reset" during error recovery 
> of a PCI Express device.  We've found a case where the device needs a 
> "fundamental reset" to recover properly.  The current PCI error recovery 
> and EEH frameworks do not support this distinction.
> 
> The attached patch (courtesy of Richard Lary) implements a reset type 
> callback that can be used to determine what type of reset a device 
> requires.  It is backwards compatible with all other drivers that 
> implement PCI error recovery callbacks.  Only drivers that require a 
> fundamental reset need to be changed.  So far we're only aware of one 
> driver that has the requirement (qla2xxx).  The patch touches mostly EEH 
> and pseries code, but does require a couple of minor additions to the 
> overall PCI error recovery framework.
> 
> Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
> 
> --- a/arch/powerpc/include/asm/ppc-pci.h    2009-06-09 
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/include/asm/ppc-pci.h    2009-07-13 
> 16:12:31.000000000 -0700
> @@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
> *
> * Returns a non-zero value if the reset failed.
> */
> -int rtas_set_slot_reset (struct pci_dn *);
> +#define HOT_RESET        1
> +#define FUNDAMENTAL_RESET    3
> +int rtas_set_slot_reset (struct pci_dn *, int reset_type);
> int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
> 
> /** --- a/arch/powerpc/platforms/pseries/eeh.c    2009-06-09 
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh.c    2009-07-13 
> 16:27:27.000000000 -0700
> @@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
> /**
> * rtas_pci_slot_reset - raises/lowers the pci #RST line
> * @pdn pci device node
> - * @state: 1/0 to raise/lower the #RST
> + * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
> *
> * Clear the EEH-frozen condition on a slot.  This routine
> * asserts the PCI #RST line if the 'state' argument is '1',
> @@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
> *  Return 0 if success, else a non-zero value.
> */
> 
> -static void __rtas_set_slot_reset(struct pci_dn *pdn)
> +static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> -    rtas_pci_slot_reset (pdn, 1);
> +    rtas_pci_slot_reset (pdn, reset_type);
> 
>     /* The PCI bus requires that the reset be held high for at least
>      * a 100 milliseconds. We wait a bit longer 'just in case'.  */
> @@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
>     msleep (PCI_BUS_SETTLE_TIME_MSEC);
> }
> 
> -int rtas_set_slot_reset(struct pci_dn *pdn)
> +int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
>     int i, rc;
> 
>     /* Take three shots at resetting the bus */
>     for (i=0; i<3; i++) {
> -        __rtas_set_slot_reset(pdn);
> +        __rtas_set_slot_reset(pdn, reset_type);
> 
>         rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
>         if (rc == 0)
> --- a/arch/powerpc/platforms/pseries/eeh_driver.c    2009-07-13 
> 14:25:24.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh_driver.c    2009-07-13 
> 16:39:16.000000000 -0700
> @@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
> 
> /* ------------------------------------------------------- */
> /**
> + * eeh_query_reset_type - query each device driver for reset type
> + *
> + * Query each device driver for special reset type if required
> + * merge the device driver responses. Cumulative response
> + * passed back in "userdata".
> + */
> +
> +static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
> +{
> +    enum pci_ers_result rc, *res = userdata;
> +    struct pci_driver *driver = dev->driver;
> +
> +    if (!driver)
> +        return 0;
> +
> +    if (!driver->err_handler ||
> +        !driver->err_handler->reset_type)
> +        return 0;
> +
> +    rc = driver->err_handler->reset_type (dev);
> +
> +    /* A driver that needs a special reset trumps all others */
> +    if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
> +
> +    return 0;
> +}
> +
> +/**
> * eeh_report_error - report pci error to each device driver
> * * Report an EEH error to each device driver, collect up and @@ -282,9 
> +310,12 @@ static int eeh_report_failure(struct pci
> * @pe_dn: pointer to a "Partionable Endpoint" device node.
> *            This is the top-level structure on which pci
> *            bus resets can be performed.
> + *
> + * reset_type: some devices may require type other than default hot reset.
> */
> 
> -static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
> +static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
> +                 int reset_type)
> {
>     struct device_node *dn;
>     int cnt, rc;
> @@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
>     /* Reset the pci controller. (Asserts RST#; resets config space).
>      * Reconfigure bridges and devices. Don't try to bring the system
>      * up if the reset failed for some reason. */
> -    rc = rtas_set_slot_reset(pe_dn);
> +    rc = rtas_set_slot_reset(pe_dn, reset_type);
>     if (rc)
>         return rc;
> 
> @@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
>     struct pci_dn *frozen_pdn;
>     struct pci_bus *frozen_bus;
>     int rc = 0;
> +    int reset_type = HOT_RESET;
>     enum pci_ers_result result = PCI_ERS_RESULT_NONE;
>     const char *location, *pci_str, *drv_str;
> 
> @@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
> 
>     /* Walk the various device drivers attached to this slot through
>      * a reset sequence, giving each an opportunity to do what it needs
> -     * to accomplish the reset.  Each child gets a report of the
> -     * status ... if any child can't handle the reset, then the entire
> -     * slot is dlpar removed and added.
> +     * to accomplish the reset.  Query device driver for special reset
> +     * requiements. Report eeh error to each child with cumulative
> +     * result status... if any child can't handle the reset,
> +     * then the entire slot is dlpar removed and added.
>      */
> +    pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
> +    if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
> +        reset_type = FUNDAMENTAL_RESET;
> +
> +    result = PCI_ERS_RESULT_NONE;
>     pci_walk_bus(frozen_bus, eeh_report_error, &result);
> 
>     /* Get the current PCI slot state. This can take a long time,
> @@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
>      * go down willingly, without panicing the system.
>      */
>     if (result == PCI_ERS_RESULT_NONE) {
> -        rc = eeh_reset_device(frozen_pdn, frozen_bus);
> +        rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
> +
>         if (rc) {
>             printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
>             goto hard_fail;
> @@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
> 
>     /* If any device called out for a reset, then reset the slot */
>     if (result == PCI_ERS_RESULT_NEED_RESET) {
> -        rc = eeh_reset_device(frozen_pdn, NULL);
> +        rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
>         if (rc) {
>             printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
>             goto hard_fail;
> --- a/include/linux/pci.h    2009-07-13 14:25:37.000000000 -0700
> +++ b/include/linux/pci.h    2009-07-13 16:12:31.000000000 -0700
> @@ -446,6 +446,9 @@ enum pci_ers_result {
> 
>     /* Device driver is fully recovered and operational */
>     PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
> +
> +    /* Device driver requires fundamental reset to recover */
> +    PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
> };
> 
> /* PCI bus error event callbacks */
> @@ -465,6 +468,9 @@ struct pci_error_handlers {
> 
>     /* Device driver may resume normal operations */
>     void (*resume)(struct pci_dev *dev);
> +
> +    /* PCI slot requires special reset type for recovery */
> +    pci_ers_result_t (*reset_type)(struct pci_dev *dev);
> };
> 
> /* ---------------------------------------------------------------- */
> --- a/arch/powerpc/include/asm/ppc-pci.h    2009-06-09 
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/include/asm/ppc-pci.h    2009-07-13 
> 16:12:31.000000000 -0700
> @@ -90,7 +90,9 @@ int rtas_pci_enable(struct pci_dn *pdn,
> *
> * Returns a non-zero value if the reset failed.
> */
> -int rtas_set_slot_reset (struct pci_dn *);
> +#define HOT_RESET        1
> +#define FUNDAMENTAL_RESET    3
> +int rtas_set_slot_reset (struct pci_dn *, int reset_type);
> int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
> 
> /** --- a/arch/powerpc/platforms/pseries/eeh.c    2009-06-09 
> 20:05:27.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh.c    2009-07-13 
> 16:27:27.000000000 -0700
> @@ -666,7 +666,7 @@ rtas_pci_enable(struct pci_dn *pdn, int
> /**
> * rtas_pci_slot_reset - raises/lowers the pci #RST line
> * @pdn pci device node
> - * @state: 1/0 to raise/lower the #RST
> + * @state: 1/3/0 to raise hot-reset/fundamental-reset/lower the #RST
> *
> * Clear the EEH-frozen condition on a slot.  This routine
> * asserts the PCI #RST line if the 'state' argument is '1',
> @@ -742,9 +742,9 @@ int pcibios_set_pcie_reset_state(struct
> *  Return 0 if success, else a non-zero value.
> */
> 
> -static void __rtas_set_slot_reset(struct pci_dn *pdn)
> +static void __rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
> -    rtas_pci_slot_reset (pdn, 1);
> +    rtas_pci_slot_reset (pdn, reset_type);
> 
>     /* The PCI bus requires that the reset be held high for at least
>      * a 100 milliseconds. We wait a bit longer 'just in case'.  */
> @@ -766,13 +766,13 @@ static void __rtas_set_slot_reset(struct
>     msleep (PCI_BUS_SETTLE_TIME_MSEC);
> }
> 
> -int rtas_set_slot_reset(struct pci_dn *pdn)
> +int rtas_set_slot_reset(struct pci_dn *pdn, int reset_type)
> {
>     int i, rc;
> 
>     /* Take three shots at resetting the bus */
>     for (i=0; i<3; i++) {
> -        __rtas_set_slot_reset(pdn);
> +        __rtas_set_slot_reset(pdn, reset_type);
> 
>         rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
>         if (rc == 0)
> --- a/arch/powerpc/platforms/pseries/eeh_driver.c    2009-07-13 
> 14:25:24.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh_driver.c    2009-07-13 
> 16:39:16.000000000 -0700
> @@ -115,6 +115,34 @@ static void eeh_enable_irq(struct pci_de
> 
> /* ------------------------------------------------------- */
> /**
> + * eeh_query_reset_type - query each device driver for reset type
> + *
> + * Query each device driver for special reset type if required
> + * merge the device driver responses. Cumulative response
> + * passed back in "userdata".
> + */
> +
> +static int eeh_query_reset_type(struct pci_dev *dev, void *userdata)
> +{
> +    enum pci_ers_result rc, *res = userdata;
> +    struct pci_driver *driver = dev->driver;
> +
> +    if (!driver)
> +        return 0;
> +
> +    if (!driver->err_handler ||
> +        !driver->err_handler->reset_type)
> +        return 0;
> +
> +    rc = driver->err_handler->reset_type (dev);
> +
> +    /* A driver that needs a special reset trumps all others */
> +    if (rc == PCI_ERS_RESULT_FUNDAMENTAL_RESET ) *res = rc;
> +
> +    return 0;
> +}
> +
> +/**
> * eeh_report_error - report pci error to each device driver
> * * Report an EEH error to each device driver, collect up and @@ -282,9 
> +310,12 @@ static int eeh_report_failure(struct pci
> * @pe_dn: pointer to a "Partionable Endpoint" device node.
> *            This is the top-level structure on which pci
> *            bus resets can be performed.
> + *
> + * reset_type: some devices may require type other than default hot reset.
> */
> 
> -static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
> +static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus,
> +                 int reset_type)
> {
>     struct device_node *dn;
>     int cnt, rc;
> @@ -298,7 +329,7 @@ static int eeh_reset_device (struct pci_
>     /* Reset the pci controller. (Asserts RST#; resets config space).
>      * Reconfigure bridges and devices. Don't try to bring the system
>      * up if the reset failed for some reason. */
> -    rc = rtas_set_slot_reset(pe_dn);
> +    rc = rtas_set_slot_reset(pe_dn, reset_type);
>     if (rc)
>         return rc;
> 
> @@ -343,6 +374,7 @@ struct pci_dn * handle_eeh_events (struc
>     struct pci_dn *frozen_pdn;
>     struct pci_bus *frozen_bus;
>     int rc = 0;
> +    int reset_type = HOT_RESET;
>     enum pci_ers_result result = PCI_ERS_RESULT_NONE;
>     const char *location, *pci_str, *drv_str;
> 
> @@ -400,10 +432,16 @@ struct pci_dn * handle_eeh_events (struc
> 
>     /* Walk the various device drivers attached to this slot through
>      * a reset sequence, giving each an opportunity to do what it needs
> -     * to accomplish the reset.  Each child gets a report of the
> -     * status ... if any child can't handle the reset, then the entire
> -     * slot is dlpar removed and added.
> +     * to accomplish the reset.  Query device driver for special reset
> +     * requiements. Report eeh error to each child with cumulative
> +     * result status... if any child can't handle the reset,
> +     * then the entire slot is dlpar removed and added.
>      */
> +    pci_walk_bus(frozen_bus, eeh_query_reset_type, &result);
> +    if ( result == PCI_ERS_RESULT_FUNDAMENTAL_RESET )
> +        reset_type = FUNDAMENTAL_RESET;
> +
> +    result = PCI_ERS_RESULT_NONE;
>     pci_walk_bus(frozen_bus, eeh_report_error, &result);
> 
>     /* Get the current PCI slot state. This can take a long time,
> @@ -425,7 +463,8 @@ struct pci_dn * handle_eeh_events (struc
>      * go down willingly, without panicing the system.
>      */
>     if (result == PCI_ERS_RESULT_NONE) {
> -        rc = eeh_reset_device(frozen_pdn, frozen_bus);
> +        rc = eeh_reset_device(frozen_pdn, frozen_bus, reset_type);
> +
>         if (rc) {
>             printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
>             goto hard_fail;
> @@ -466,7 +505,7 @@ struct pci_dn * handle_eeh_events (struc
> 
>     /* If any device called out for a reset, then reset the slot */
>     if (result == PCI_ERS_RESULT_NEED_RESET) {
> -        rc = eeh_reset_device(frozen_pdn, NULL);
> +        rc = eeh_reset_device(frozen_pdn, NULL, reset_type);
>         if (rc) {
>             printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
>             goto hard_fail;
> --- a/include/linux/pci.h    2009-07-13 14:25:37.000000000 -0700
> +++ b/include/linux/pci.h    2009-07-13 16:12:31.000000000 -0700
> @@ -446,6 +446,9 @@ enum pci_ers_result {
> 
>     /* Device driver is fully recovered and operational */
>     PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
> +
> +    /* Device driver requires fundamental reset to recover */
> +    PCI_ERS_RESULT_FUNDAMENTAL_RESET = (__force pci_ers_result_t) 6,
> };
> 
> /* PCI bus error event callbacks */
> @@ -465,6 +468,9 @@ struct pci_error_handlers {
> 
>     /* Device driver may resume normal operations */
>     void (*resume)(struct pci_dev *dev);
> +
> +    /* PCI slot requires special reset type for recovery */
> +    pci_ers_result_t (*reset_type)(struct pci_dev *dev);
> };
> 
> /* ---------------------------------------------------------------- */
> 
> 

^ permalink raw reply

* [PATCH] Support for PCI Express reset type in EEH
From: Mike Mason @ 2009-07-15 18:45 UTC (permalink / raw)
  To: linuxppc-dev, linux-pci, Paul Mackerras, benh, linasvepstas; +Cc: Richard Lary
In-Reply-To: <4A5CCFDF.7000901@us.ibm.com>

By default, EEH does what's known as a "hot reset" during error recovery of a PCI Express device.  We've found a case where the device needs a "fundamental reset" to recover properly.  The current PCI error recovery and EEH frameworks do not support this distinction.

The attached patch (courtesy of Richard Lary) adds a bit field to pci_dev that indicates whether the device requires a fundamental reset during error recovery.  This bit can be checked by EEH to determine which reset type is required.

This patch supersedes the previously submitted patch that implemented a reset type callback.

Please review and let me know of any concerns.

Signed-off-by: Mike Mason <mmlnx@us.ibm.com> 

diff -uNrp a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
--- a/arch/powerpc/kernel/pci_64.c	2009-07-13 14:25:24.000000000 -0700
+++ b/arch/powerpc/kernel/pci_64.c	2009-07-15 10:26:26.000000000 -0700
@@ -143,6 +143,7 @@ struct pci_dev *of_create_pci_dev(struct
 	dev->dev.bus = &pci_bus_type;
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
+	dev->fndmntl_rst_rqd = 0;       /* pcie fundamental reset required */
 
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
diff -uNrp a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
--- a/arch/powerpc/platforms/pseries/eeh.c	2009-06-09 20:05:27.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh.c	2009-07-15 10:29:04.000000000 -0700
@@ -744,7 +744,15 @@ int pcibios_set_pcie_reset_state(struct
 
 static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	rtas_pci_slot_reset (pdn, 1);
+	struct pci_dev *dev = pdn->pcidev;
+
+	/* Determine type of EEH reset required by device,
+	 * default hot reset or fundamental reset
+	 */
+	if (dev->fndmntl_rst_rqd)
+		rtas_pci_slot_reset(pdn, 3);
+	else
+		rtas_pci_slot_reset(pdn, 1);
 
 	/* The PCI bus requires that the reset be held high for at least
 	 * a 100 milliseconds. We wait a bit longer 'just in case'.  */
diff -uNrp a/include/linux/pci.h b/include/linux/pci.h
--- a/include/linux/pci.h	2009-07-13 14:25:37.000000000 -0700
+++ b/include/linux/pci.h	2009-07-15 10:25:37.000000000 -0700
@@ -273,6 +273,7 @@ struct pci_dev {
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
+	unsigned int    fndmntl_rst_rqd:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;

^ permalink raw reply

* Re: [00/15] swiotlb cleanup
From: Becky Bruce @ 2009-07-15 20:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, tony.luck, linux-ia64, Ian Campbell,
	Joerg Roedel, x86, linux-kernel@vger.kernel.org Mailing List,
	FUJITA Tomonori, linuxppc-dev@ozlabs.org list
In-Reply-To: <68EFFAF6-EF5B-4148-BC54-70BF2AF2456E@kernel.crashing.org>


On Jul 13, 2009, at 10:13 PM, Becky Bruce wrote:

>
> On Jul 10, 2009, at 12:12 AM, Ingo Molnar wrote:
>
>>
>> * FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> wrote:
>>
>>> - removes unused (and unnecessary) hooks in swiotlb.
>>>
>>> - adds dma_capable() and converts swiotlb to use it. It can be  
>>> used to
>>> know if a memory area is dma capable or not. I added
>>> is_buffer_dma_capable() for the same purpose long ago but it turned
>>> out that the function doesn't work on POWERPC.
>>>
>>> This can be applied cleanly to linux-next, -mm, and mainline. This
>>> patchset touches multiple architectures (ia64, powerpc, x86) so I
>>> guess that -mm is appropriate for this patchset (I don't care much
>>> what tree would merge this though).
>>>
>>> This is tested on x86 but only compile tested on POWERPC and IA64.
>>>
>>> Thanks,
>>>
>>> =
>>> arch/ia64/include/asm/dma-mapping.h    |   18 ++++++
>>> arch/powerpc/include/asm/dma-mapping.h |   23 +++++++
>>> arch/powerpc/kernel/dma-swiotlb.c      |   48 +---------------
>>> arch/x86/include/asm/dma-mapping.h     |   18 ++++++
>>> arch/x86/kernel/pci-dma.c              |    2 +-
>>> arch/x86/kernel/pci-gart_64.c          |    5 +-
>>> arch/x86/kernel/pci-nommu.c            |    2 +-
>>> arch/x86/kernel/pci-swiotlb.c          |   25 --------
>>> include/linux/dma-mapping.h            |    5 --
>>> include/linux/swiotlb.h                |   11 ----
>>> lib/swiotlb.c                          |  102 ++++++++ 
>>> +-----------------------
>>> 11 files changed, 92 insertions(+), 167 deletions(-)
>>
>> Hm, the functions and facilities you remove here were added as part
>> of preparatory patches for Xen guest support. You were aware of
>> them, you were involved in discussions about those aspects with Ian
>> and Jeremy but still you chose not to Cc: either of them and you
>> failed to address that aspect in the changelogs.
>>
>> I'd like the Xen code to become cleaner more than anyone else here i
>> guess, but patch submission methods like this are not really
>> helpful. A far better method is to be open about such disagreements,
>> to declare them, to Cc: everyone who disagrees, and to line out the
>> arguments in the changelogs as well - instead of just curtly
>> declaring those APIs 'unused' and failing to Cc: involved parties.
>>
>> Alas, on the technical level the cleanups themselves look mostly
>> fine to me. Ian, Jeremy, the changes will alter Xen's use of
>> swiotlb, but can the Xen side still live with these new methods - in
>> particular is dma_capable() sufficient as a mechanism and can the
>> Xen side filter out DMA allocations to make them physically
>> continuous?
>>
>> Ben, Tony, Becky, any objections wrt. the PowerPC / IA64 impact? If
>> everyone agrees i can apply them to the IOMMU tree, test it and push
>> it out to -next, etc.
>>
>
> Ingo,
>
> With the exception of the patch I commented on, I think these look  
> OK from the powerpc point of view.  I've successfully booted one of  
> my test platforms with the entire series applied and will run some  
> more extensive (i.e. not "Whee!  A prompt!") tests tomorrow.

Well, I am still testing.  I've observed one unexpected LTP testcase  
failure with these patches applied, but so far have been unable to  
reproduce it.  So these patches are probably OK, but I will look into  
this some more next week.

-Becky

>
>
> -Becky
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* [PATCH] Hold reference to device_node during EEH event handling
From: Mike Mason @ 2009-07-15 21:43 UTC (permalink / raw)
  To: linuxppc-dev, Paul Mackerras, benh, linasvepstas

This patch increments the device_node reference counter when an EEH error occurs and decrements the counter when the event has been handled.  This is to prevent the device_node from being released until eeh_event_handler() has had a chance to deal with the event.  We've seen cases where the device_node is released too soon when an EEH event occurs during a dlpar remove, causing the event handler to attempt to access bad memory locations.

Please review and let me know of any concerns.

Signed-off-by: Mike Mason <mmlnx@us.ibm.com> 

--- a/arch/powerpc/platforms/pseries/eeh_event.c	2008-10-09 15:13:53.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh_event.c	2009-07-14 14:14:00.000000000 -0700
@@ -75,6 +75,14 @@ static int eeh_event_handler(void * dumm
 	if (event == NULL)
 		return 0;
 
+	/* EEH holds a reference to the device_node, so if it
+	 * equals 1 it's no longer valid and the event should
+	 * be ignored */
+	if (atomic_read(&event->dn->kref.refcount) == 1) {
+		of_node_put(event->dn);
+		return 0;
+	}
+
 	/* Serialize processing of EEH events */
 	mutex_lock(&eeh_event_mutex);
 	eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
@@ -86,6 +94,7 @@ static int eeh_event_handler(void * dumm
 
 	eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
 	pci_dev_put(event->dev);
+	of_node_put(event->dn);
 	kfree(event);
 	mutex_unlock(&eeh_event_mutex);
 
@@ -140,7 +149,7 @@ int eeh_send_failure_event (struct devic
 	if (dev)
 		pci_dev_get(dev);
 
-	event->dn = dn;
+	event->dn = of_node_get(dn);
 	event->dev = dev;
 
 	/* We may or may not be called in an interrupt context */

^ permalink raw reply

* Re: having access to interrupt specifier in map() function
From: Benjamin Herrenschmidt @ 2009-07-15 22:33 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev@ozlabs.org list
In-Reply-To: <761CE7B3-A7FD-41FD-A828-B9F17F73DF32@kernel.crashing.org>

On Wed, 2009-07-15 at 09:11 -0500, Kumar Gala wrote:
> Ben,
> 
> Do you have any ideas on keeping access to the interrupt specifier  
> around so when we call map() we have access to it.  Our HV guys are  
> looking at using additional bits in the interrupt specifier to encode  
> information beyond just level/sense of the IRQ and want to make  
> decisions based on it during map().
> 
> Maybe we can keep it around in irq_map[].

Or we could translate those additional info into flags in the IRQ desc ?

Might be possible to request some arch specific flags in there.

Cheers,
Ben.

^ permalink raw reply

* Re: removing addr_needs_map in struct dma_mapping_ops
From: Becky Bruce @ 2009-07-15 23:59 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20090714094919V.fujita.tomonori@lab.ntt.co.jp>


On Jul 13, 2009, at 7:49 PM, FUJITA Tomonori wrote:

> On Mon, 13 Jul 2009 16:50:43 -0500
> Becky Bruce <beckyb@kernel.crashing.org> wrote:
>
>>> talked about defining something like struct dma_data. Then we could
>>>
>>> struct dev_archdata {
>>>     ...
>>>
>>>     struct dma_data *ddata;
>>> };
>>>
>>> or
>>>
>>> struct dev_archdata {
>>>     ...
>>>
>>>     struct dma_data ddata;
>>> };
>>>
>>>
>>> struct dma_data needs dma_direct_offset, iommu_table, dma_base, and
>>> dma_window_size, anything else?
>>
>> IIRC, what we had talked about was simpler - we talked about changing
>> the current dev_archdata from this:
>>
>> struct dev_archdata {
>>        struct device_node      *of_node;
>>        struct dma_mapping_ops  *dma_ops;
>>        void                    *dma_data;
>> };
>>
>> to this:
>>
>> struct dev_archdata {
>> 	struct device_node *of_node;
>> 	struct dma_mapping_ops *dma_ops;
>> 	unsigned long long dma_data;
>> #ifdef CONFIG_SWIOTLB
>> 	dma_addr_t max_direct_dma_addr;
>> #endif
>> };
>>
>> Where max_direct_dma_addr is the address beyond which a specific
>> device must use swiotlb, and dma_data is the offset like it is now
>> (but wider on 32-bit systems than void *). I believe Ben had  
>> mentioned
>> wanting to make the max_direct_dma_addr part conditional so we don't
>> bloat archdata on platforms that don't ever bounce.
>
> Only maximum address is enough? The minimum (dma_window_base_cur in
> swiotlb_pci_addr_needs_map) is not necessary?
>
>
>> The change to the type of dma_data is actually in preparation for an
>> optimization I have planned for 64-bit PCI devices (and which  
>> probably
>> requires more discussion), so that doesn't need to happen now -  just
>> leave it as a void *, and I can post a followup patch.
>>
>> Let me know if I can help or do any testing - I've been meaning to
>> look into switching to dma_map_ops for a while now but it hasn't
>> managed to pop off my todo stack.
>
> Ok, how about this? I'm not familiar with POWERPC so I might
> misunderstand something.

This is close, but it misses the setup for non-pci devices. We have a  
bus notifier that we use to set up archdata for those devices -   
ppc_swiotlb_bus_notify() in arch/powerpc/kernel/dma-swiotlb.c.  It  
won't cause breakage to not have this set up, because those will fall  
through to the dma_capable(), but I think we should initialize it  
anyway (who knows what it will end up used for later....).

>
>
>
> diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/ 
> include/asm/device.h
> index 7d2277c..0086f8d 100644
> --- a/arch/powerpc/include/asm/device.h
> +++ b/arch/powerpc/include/asm/device.h
> @@ -16,6 +16,9 @@ struct dev_archdata {
> 	/* DMA operations on that device */
> 	struct dma_mapping_ops	*dma_ops;
> 	void			*dma_data;
> +#ifdef CONFIG_SWIOTLB
> +	dma_addr_t		max_direct_dma_addr;
> +#endif
> };
>
> static inline void dev_archdata_set_node(struct dev_archdata *ad,
> diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/ 
> include/asm/swiotlb.h
> index 30891d6..b23a4f1 100644
> --- a/arch/powerpc/include/asm/swiotlb.h
> +++ b/arch/powerpc/include/asm/swiotlb.h
> @@ -24,4 +24,6 @@ static inline void dma_mark_clean(void *addr,  
> size_t size) {}
> extern unsigned int ppc_swiotlb_enable;
> int __init swiotlb_setup_bus_notifier(void);
>
> +extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
> +
> #endif /* __ASM_SWIOTLB_H */
> diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/ 
> dma-swiotlb.c
> index 68ccf11..e21359e 100644
> --- a/arch/powerpc/kernel/dma-swiotlb.c
> +++ b/arch/powerpc/kernel/dma-swiotlb.c
> @@ -56,39 +56,16 @@ swiotlb_arch_address_needs_mapping(struct device  
> *hwdev, dma_addr_t addr,
> 				   size_t size)
> {
> 	struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
> +	struct dev_archdata *sd = &hwdev->archdata;
>
> 	BUG_ON(!dma_ops);
> -	return dma_ops->addr_needs_map(hwdev, addr, size);
> -}

You can get rid of the dma_ops stuff here.... it's no longer needed.

>
>
> -/*
> - * Determine if an address is reachable by a pci device, or if we  
> must bounce.
> - */
> -static int
> -swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr,  
> size_t size)
> -{
> -	u64 mask = dma_get_mask(hwdev);
> -	dma_addr_t max;
> -	struct pci_controller *hose;
> -	struct pci_dev *pdev = to_pci_dev(hwdev);
> -
> -	hose = pci_bus_to_host(pdev->bus);
> -	max = hose->dma_window_base_cur + hose->dma_window_size;
> -
> -	/* check that we're within mapped pci window space */
> -	if ((addr + size > max) | (addr < hose->dma_window_base_cur))
> +	if (sd->max_direct_dma_addr && addr + size > sd- 
> >max_direct_dma_addr)
> 		return 1;
>
> -	return !is_buffer_dma_capable(mask, addr, size);
> -}
> -
> -static int
> -swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr,  
> size_t size)
> -{
> 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
> }
>
> -
> /*
>  * At the moment, all platforms that use this code only require
>  * swiotlb to be used if we're operating on HIGHMEM.  Since
> @@ -104,7 +81,6 @@ struct dma_mapping_ops swiotlb_dma_ops = {
> 	.dma_supported = swiotlb_dma_supported,
> 	.map_page = swiotlb_map_page,
> 	.unmap_page = swiotlb_unmap_page,
> -	.addr_needs_map = swiotlb_addr_needs_map,
> 	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
> 	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
> 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
> @@ -119,13 +95,23 @@ struct dma_mapping_ops swiotlb_pci_dma_ops = {
> 	.dma_supported = swiotlb_dma_supported,
> 	.map_page = swiotlb_map_page,
> 	.unmap_page = swiotlb_unmap_page,
> -	.addr_needs_map = swiotlb_pci_addr_needs_map,
> 	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
> 	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
> 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
> 	.sync_sg_for_device = swiotlb_sync_sg_for_device
> };
>
> +void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
> +{
> +	struct pci_controller *hose;
> +	struct dev_archdata *sd;
> +
> +	hose = pci_bus_to_host(pdev->bus);
> +	sd = &pdev->dev.archdata;
> +	sd->max_direct_dma_addr =
> +		hose->dma_window_base_cur + hose->dma_window_size;
> +}
> +
> static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
> 				  unsigned long action, void *data)
> {
> diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/ 
> platforms/85xx/mpc8536_ds.c
> index 055ff41..401751b 100644
> --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
> +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
> @@ -136,6 +136,7 @@ define_machine(mpc8536_ds) {
> 	.init_IRQ		= mpc8536_ds_pic_init,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> 	.get_irq		= mpic_get_irq,
> 	.restart		= fsl_rstcr_restart,
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/ 
> platforms/85xx/mpc85xx_ds.c
> index 849c0ac..1ba8e38 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
> @@ -277,6 +277,7 @@ define_machine(mpc8544_ds) {
> 	.init_IRQ		= mpc85xx_ds_pic_init,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> 	.get_irq		= mpic_get_irq,
> 	.restart		= fsl_rstcr_restart,
> @@ -291,6 +292,7 @@ define_machine(mpc8572_ds) {
> 	.init_IRQ		= mpc85xx_ds_pic_init,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> 	.get_irq		= mpic_get_irq,
> 	.restart		= fsl_rstcr_restart,
> @@ -305,6 +307,7 @@ define_machine(p2020_ds) {
> 	.init_IRQ		= mpc85xx_ds_pic_init,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> 	.get_irq		= mpic_get_irq,
> 	.restart		= fsl_rstcr_restart,
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/ 
> powerpc/platforms/85xx/mpc85xx_mds.c
> index 60ed9c0..165a2de 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
> @@ -356,6 +356,7 @@ define_machine(mpc8568_mds) {
> 	.progress	= udbg_progress,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> };
>
> @@ -377,5 +378,6 @@ define_machine(mpc8569_mds) {
> 	.progress	= udbg_progress,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> };
> diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/ 
> powerpc/platforms/86xx/mpc86xx_hpcn.c
> index 6632702..d1878f3 100644
> --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
> +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
> @@ -187,5 +187,6 @@ define_machine(mpc86xx_hpcn) {
> 	.progress		= udbg_progress,
> #ifdef CONFIG_PCI
> 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +	.pci_dma_dev_setup	= pci_dma_dev_setup_swiotlb,
> #endif
> };

Instead of initializing this here (which has problems if ! 
CONFIG_SWIOTLB), place this in the xxxxx_xxxx_setup_arch function in  
the same files, which already have an #ifdef CONFIG_SWIOTLB in which  
this can be embedded.

I'm about to be off-list for a few days but will be happy to help when  
I'm back next week.

Thanks!
Becky


>
> --
> To unsubscribe from this list: send the line "unsubscribe linux- 
> kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: [PATCH 2/4] edac: mpc85xx add mpc83xx support
From: Doug Thompson @ 2009-07-16  0:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ira W. Snyder, linux-kernel, Dave Jiang, linuxppc-dev,
	bluesmoke-devel
In-Reply-To: <20090715125249.e746496f.akpm@linux-foundation.org>

=0AIra or Kumar,=0A=0Acan you address Andrew's concerns below and what was =
posted in prior posts on this?=0A=0Athanks=0A=0Adoug t=0A=0A--- On Wed, 7/1=
5/09, Andrew Morton <akpm@linux-foundation.org> wrote:=0A=0A> From: Andrew =
Morton <akpm@linux-foundation.org>=0A> Subject: Re: [PATCH 2/4] edac: mpc85=
xx add mpc83xx support=0A> To: dougthompson@xmission.com=0A> Cc: bluesmoke-=
devel@lists.sourceforge.net, linux-kernel@vger.kernel.org=0A> Date: Wednesd=
ay, July 15, 2009, 1:52 PM=0A> On Wed, 15 Jul 2009 11:38:49 -0600=0A> dougt=
hompson@xmission.com=0A> wrote:=0A> =0A> > =0A> > Add support for the Frees=
cale MPC83xx memory=0A> controller to the existing=0A> > driver for the Fre=
escale MPC85xx memory controller.=0A> The only difference=0A> > between the=
 two processors are in the CS_BNDS register=0A> parsing code, which=0A> > h=
as been changed so it will work on both processors.=0A> > =0A> > The L2 cac=
he controller does not exist on the MPC83xx,=0A> but the OF subsystem=0A> >=
 will not use the driver if the device is not present=0A> in the OF device =
tree.=0A> > =0A> > =0A> > Kumar, I had to change the nr_pages calculation t=
o=0A> make the math work=0A> > out. I checked it on my board and did the ma=
th by hand=0A> for a 64GB 85xx=0A> > using 64K pages. In both cases, nr_pag=
es * PAGE_SIZE=0A> comes out to the=0A> > correct value. Thanks for the hel=
p.=0A> > =0A> > v1 -> v2:=0A> >=A0=A0=A0* Use PAGE_SHIFT to parse cs_bnds=
=0A> regardless of board type=0A> >=A0=A0=A0* Remove special-casing for the=
 83xx=0A> processor=0A> > =0A> > ...=0A> >=0A> > @@ -789,19 +791,20 @@ stat=
ic void __devinit=0A> mpc85xx_init_csrow=0A> >=A0 =A0=A0=A0 =A0=A0=A0 csrow=
 =3D=0A> &mci->csrows[index];=0A> >=A0 =A0=A0=A0 =A0=A0=A0 cs_bnds =3D=0A> =
in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +=0A> >=A0 =A0=A0=A0 =A0=A0=
=A0=0A> =A0=A0=A0 =A0=A0=A0 =A0 (index *=0A> MPC85XX_MC_CS_BNDS_OFS));=0A> =
> -=A0=A0=A0 =A0=A0=A0 start =3D=0A> (cs_bnds & 0xfff0000) << 4;=0A> > -=A0=
=A0=A0 =A0=A0=A0 end =3D ((cs_bnds=0A> & 0xfff) << 20);=0A> > -=A0=A0=A0 =
=A0=A0=A0 if (start)=0A> > -=A0=A0=A0 =A0=A0=A0=0A> =A0=A0=A0 start |=3D 0x=
fffff;=0A> > -=A0=A0=A0 =A0=A0=A0 if (end)=0A> > -=A0=A0=A0 =A0=A0=A0=0A> =
=A0=A0=A0 end |=3D 0xfffff;=0A> > +=0A> > +=A0=A0=A0 =A0=A0=A0 start =3D=0A=
> (cs_bnds & 0xffff0000) >> 16;=0A> > +=A0=A0=A0 =A0=A0=A0=0A> end=A0=A0=A0=
=3D (cs_bnds & 0x0000ffff);=0A> >=A0 =0A> >=A0 =A0=A0=A0 =A0=A0=A0 if (star=
t=0A> =3D=3D end)=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> =A0=A0=A0 continue;=A0=
=A0=A0 /* not=0A> populated */=0A> >=A0 =0A> > +=A0=A0=A0 =A0=A0=A0 start <=
<=3D=0A> (24 - PAGE_SHIFT);=0A> > +=A0=A0=A0 =A0=A0=A0=0A> end=A0=A0=A0<<=
=3D (24 - PAGE_SHIFT);=0A> > +=A0=A0=A0 =A0=A0=A0 end=A0=0A> =A0 |=3D (1 <<=
 (24 - PAGE_SHIFT)) - 1;=0A> =0A> <stares for a while>=0A> =0A> That looks =
like the original code was really really wrong.=0A> =0A> The setting of all=
 the lower bits in `end' is=0A> funny-looking.=A0 What's=0A> happening here=
?=A0 Should it be commented?=0A> =0A> =0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> csr=
ow->first_page =3D start >> PAGE_SHIFT;=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> cs=
row->last_page =3D end >> PAGE_SHIFT;=0A> > -=A0=A0=A0 =A0=A0=A0=0A> csrow-=
>nr_pages =3D csrow->last_page + 1 -=0A> csrow->first_page;=0A> > +=A0=A0=
=A0 =A0=A0=A0=0A> csrow->nr_pages =3D end + 1 - start;=0A> >=A0 =A0=A0=A0 =
=A0=A0=A0=0A> csrow->grain =3D 8;=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> csrow->m=
type =3D mtype;=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> csrow->dtype =3D DEV_UNKNO=
WN;=0A> > @@ -985,6 +988,7 @@ static struct of_device_id=0A> mpc85xx_mc_er=
=0A> >=A0 =A0=A0=A0 { .compatible =3D=0A> "fsl,mpc8560-memory-controller", =
},=0A> >=A0 =A0=A0=A0 { .compatible =3D=0A> "fsl,mpc8568-memory-controller"=
, },=0A> >=A0 =A0=A0=A0 { .compatible =3D=0A> "fsl,mpc8572-memory-controlle=
r", },=0A> > +=A0=A0=A0 { .compatible =3D=0A> "fsl,mpc8349-memory-controlle=
r", },=0A> >=A0 =A0=A0=A0 { .compatible =3D=0A> "fsl,p2020-memory-controlle=
r", },=0A> >=A0 =A0=A0=A0 {},=0A> >=A0 };=0A> > @@ -1001,13 +1005,13 @@ sta=
tic struct=0A> of_platform_driver mpc85xx=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> =
=A0=A0=A0},=0A> >=A0 };=0A> >=A0 =0A> > -=0A> > +#ifdef CONFIG_MPC85xx=0A> =
>=A0 static void __init mpc85xx_mc_clear_rfxe(void=0A> *data)=0A> >=A0 {=0A=
> >=A0 =A0=A0=A0 orig_hid1[smp_processor_id()]=0A> =3D mfspr(SPRN_HID1);=0A=
> >=A0 =A0=A0=A0 mtspr(SPRN_HID1,=0A> (orig_hid1[smp_processor_id()] & ~0x2=
0000));=0A> >=A0 }=0A> > -=0A> > +#endif=0A> >=A0 =0A> >=A0 static int __in=
it mpc85xx_mc_init(void)=0A> >=A0 {=0A> > @@ -1040,26 +1044,32 @@ static in=
t __init=0A> mpc85xx_mc_init(void)=0A> >=A0 =A0=A0=A0 =A0=A0=A0=0A> printk(=
KERN_WARNING EDAC_MOD_STR "PCI fails to=0A> register\n");=0A> >=A0 #endif=
=0A> >=A0 =0A> > +#ifdef CONFIG_MPC85xx=0A> >=A0 =A0=A0=A0 /*=0A> >=A0 =A0=
=A0=A0=A0=A0* need to clear=0A> HID1[RFXE] to disable machine check int=0A>=
 >=A0 =A0=A0=A0=A0=A0* so we can catch=0A> it=0A> >=A0 =A0=A0=A0=A0=A0*/=0A=
> >=A0 =A0=A0=A0 if (edac_op_state =3D=3D=0A> EDAC_OPSTATE_INT)=0A> >=A0 =
=A0=A0=A0 =A0=A0=A0=0A> on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);=0A> > =
+#endif=0A> >=A0 =0A> >=A0 =A0=A0=A0 return 0;=0A> >=A0 }=0A> =0A> The patc=
h adds lots of ifdefs :(=0A> =0A> >=A0 module_init(mpc85xx_mc_init);=0A> >=
=A0 =0A> > +#ifdef CONFIG_MPC85xx=0A> >=A0 static void __exit mpc85xx_mc_re=
store_hid1(void=0A> *data)=0A> >=A0 {=0A> >=A0 =A0=A0=A0 mtspr(SPRN_HID1,=
=0A> orig_hid1[smp_processor_id()]);=0A> >=A0 }=0A> > +#endif=0A> =0A> afac=
it this will run smp_processor_id() from within=0A> preemptible code,=0A> w=
hich is often buggy on preemptible kernels and will cause=0A> runtime=0A> w=
arnings on at least some architectures.=0A> =0A> >=A0 static void __exit mp=
c85xx_mc_exit(void)=0A> >=A0 {=0A> > +#ifdef CONFIG_MPC85xx=0A> >=A0 =A0=A0=
=A0=0A> on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);=0A> > +#endif=0A> >=
=A0 #ifdef CONFIG_PCI=0A> >=A0 =A0=A0=A0=0A> of_unregister_platform_driver(=
&mpc85xx_pci_err_driver);=0A> >=A0 #endif=0A> 

^ permalink raw reply

* [PATCH] kmemleak: Allow kmemleak to be built on powerpc
From: Michael Ellerman @ 2009-07-16  1:25 UTC (permalink / raw)
  To: catalin.marinas; +Cc: linuxppc-dev

Very lightly tested, doesn't crash the kernel.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---

It doesn't look like we actually need to add any support in the
arch code - or is there something I'm missing?


 lib/Kconfig.debug |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2..d5ca9a5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -338,7 +338,7 @@ config SLUB_STATS
 
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \
+	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM || PPC) && \
 		!MEMORY_HOTPLUG
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
-- 
1.6.2.1

^ permalink raw reply related

* Re: [RFC/PATCH] mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()
From: Michael Ellerman @ 2009-07-16  1:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Linux-Arch, Nick Piggin, linuxppc-dev, Hugh Dickins, linux-kernel,
	Linux Memory Management
In-Reply-To: <20090715074952.A36C7DDDB2@ozlabs.org>

[-- Attachment #1: Type: text/plain, Size: 670 bytes --]

On Wed, 2009-07-15 at 17:49 +1000, Benjamin Herrenschmidt wrote:
> Upcoming paches to support the new 64-bit "BookE" powerpc architecture
> will need to have the virtual address corresponding to PTE page when
> freeing it, due to the way the HW table walker works.

> I haven't had a chance to test or even build on most architectures, the
> patch is reasonably trivial but I may have screwed up regardless, I
> appologize in advance, let me know if something is wrong.

Builds for the important architectures, powerpc, ia64, arm, sparc,
sparc64, oh and x86:

http://kisskb.ellerman.id.au/kisskb/head/1976/

(based on your test branch 34f25476)

cheers

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply

* Re: [PATCH] Hold reference to device_node during EEH event handling
From: Michael Ellerman @ 2009-07-16  1:41 UTC (permalink / raw)
  To: Mike Mason; +Cc: linuxppc-dev, linasvepstas, Paul Mackerras
In-Reply-To: <4A5E4D68.6070909@us.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1456 bytes --]

On Wed, 2009-07-15 at 14:43 -0700, Mike Mason wrote:
> This patch increments the device_node reference counter when an EEH
> error occurs and decrements the counter when the event has been
> handled.  This is to prevent the device_node from being released until
> eeh_event_handler() has had a chance to deal with the event.  We've
> seen cases where the device_node is released too soon when an EEH
> event occurs during a dlpar remove, causing the event handler to
> attempt to access bad memory locations.
> 
> Please review and let me know of any concerns.

Taking a reference sounds sane, but ...

> Signed-off-by: Mike Mason <mmlnx@us.ibm.com> 
> 
> --- a/arch/powerpc/platforms/pseries/eeh_event.c	2008-10-09 15:13:53.000000000 -0700
> +++ b/arch/powerpc/platforms/pseries/eeh_event.c	2009-07-14 14:14:00.000000000 -0700
> @@ -75,6 +75,14 @@ static int eeh_event_handler(void * dumm
>  	if (event == NULL)
>  		return 0;
>  
> +	/* EEH holds a reference to the device_node, so if it
> +	 * equals 1 it's no longer valid and the event should
> +	 * be ignored */
> +	if (atomic_read(&event->dn->kref.refcount) == 1) {
> +		of_node_put(event->dn);
> +		return 0;
> +	}

That's really gross :)

And what happens if the refcount goes to 1 just after the check? ie.
here.

>  	/* Serialize processing of EEH events */
>  	mutex_lock(&eeh_event_mutex);
>  	eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);


cheers


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply

* Re: [RFC/PATCH] mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()
From: Benjamin Herrenschmidt @ 2009-07-16  1:54 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Linux-Arch, Linux Memory Management, Hugh Dickins, linux-kernel,
	linuxppc-dev
In-Reply-To: <20090715135620.GD7298@wotan.suse.de>

On Wed, 2009-07-15 at 15:56 +0200, Nick Piggin wrote:
> On Wed, Jul 15, 2009 at 05:49:47PM +1000, Benjamin Herrenschmidt wrote:
> > Upcoming paches to support the new 64-bit "BookE" powerpc architecture
> > will need to have the virtual address corresponding to PTE page when
> > freeing it, due to the way the HW table walker works.
> > 
> > Basically, the TLB can be loaded with "large" pages that cover the whole
> > virtual space (well, sort-of, half of it actually) represented by a PTE
> > page, and which contain an "indirect" bit indicating that this TLB entry
> > RPN points to an array of PTEs from which the TLB can then create direct
> > entries.
> 
> RPN is PFN in ppc speak, right?

Ah right, real page number in ppc slang :-)

> > Thus, in order to invalidate those when PTE pages are deleted,
> > we need the virtual address to pass to tlbilx or tlbivax instructions.
> 
> Interesting arrangement. So are these last level ptes modifieable
> from userspace or something? If not, I wonder if you could manage
> them as another level of pointers with the existing pagetable
> functions?

I don't understand what you mean. Basically, the TLB contains PMD's.
There's nothing to change to the existing page table layout :-) But
because they appear as large page TLB entries that cover the virtual
space covered by a PMD, they need to be invalidated using virtual
addresses when PMDs are removed.

> > The old trick of sticking it somewhere in the PTE page struct page sucks
> > too much, the address is almost readily available in all call sites and
> > almost everybody implemets these as macros, so we may as well add the
> > argument everywhere. I added it to the pmd and pud variants for consistency.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> > ---
> > 
> > I would like to merge the new support that depends on this in 2.6.32,
> > so unless there's major objections, I'd like this to go in early during
> > the merge window. We can sort out separately how to carry the patch
> > around in -next until then since the powerpc tree will have a dependency
> > on it.
> 
> Can't see any problem with that.

Thanks, can I get an Ack then ? :-)

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC/PATCH] mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()
From: Benjamin Herrenschmidt @ 2009-07-16  1:56 UTC (permalink / raw)
  To: michael
  Cc: Linux-Arch, Nick Piggin, linuxppc-dev, Hugh Dickins, linux-kernel,
	Linux Memory Management
In-Reply-To: <1247708177.9851.4.camel@concordia>

On Thu, 2009-07-16 at 11:36 +1000, Michael Ellerman wrote:
> 
> Builds for the important architectures, powerpc, ia64, arm, sparc,
> sparc64, oh and x86:
> 
> http://kisskb.ellerman.id.au/kisskb/head/1976/
> 
> (based on your test branch 34f25476)

Note for all lurkers: the fails in there are unrelated to the patch
(mostly warnings triggering our new Werror and probably mostly fixed
upstream already).

Cheers,
Ben.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox