LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] 8xx: Large page(8MB) support for 2.4
From: Joakim Tjernlund @ 2011-10-10 11:38 UTC (permalink / raw)
  To: Dan Malek, linuxppc-dev, Scott Wood, Willy Tarreau

This adds Large page support for 8xx and uses it
for all kernel RAM.

Further usage is possible, IMAP_ADDR and on board
flash comes to mind.

There is one bit free the pte which could be used for
selecting different large page sizes but that is for another
day.

- Dan, what do you think :)

Joakim Tjernlund (3):
  8xx: replace _PAGE_EXEC with _PAGE_PSE
  8xx: Support LARGE pages in TLB code.
  8xx: Use LARGE pages for kernel RAM.

 arch/ppc/kernel/head_8xx.S |   30 +++++++++++++++++++-----------
 arch/ppc/mm/pgtable.c      |    4 +++-
 include/asm-ppc/pgtable.h  |    6 +++++-
 3 files changed, 27 insertions(+), 13 deletions(-)

-- 
1.7.3.4

^ permalink raw reply

* [PATCH 1/3] 8xx: replace _PAGE_EXEC with _PAGE_PSE
From: Joakim Tjernlund @ 2011-10-10 11:38 UTC (permalink / raw)
  To: Dan Malek, linuxppc-dev, Scott Wood, Willy Tarreau
In-Reply-To: <1318246714-5015-1-git-send-email-Joakim.Tjernlund@transmode.se>

We need this bit for large pages(8MB). Adjust TLB code
to not clear bit 28 Mx_RPN

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 arch/ppc/kernel/head_8xx.S |    8 ++++----
 include/asm-ppc/pgtable.h  |    6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
index 36089cc..8e3fe40 100644
--- a/arch/ppc/kernel/head_8xx.S
+++ b/arch/ppc/kernel/head_8xx.S
@@ -387,13 +387,13 @@ InstructionTLBMiss:
 4:
 #endif
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21 and 28 must be clear.
+	 * Software indicator bit 21 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
 2:	li	r21, 0x00f0
-	rlwimi	r20, r21, 0, 0x07f8	/* Set 24-27, clear 21-23,28 */
+	rlwimi	r20, r21, 0, 0x07f0	/* Set 24-27, clear 21-23 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	MI_RPN, r20	/* Update TLB entry */
 
@@ -475,7 +475,7 @@ DataStoreTLBMiss:
 	xori	r20, r20, _PAGE_RW | 0x200
 
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 22 and 28 must be clear.
+	 * Software indicator bit 22 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
@@ -483,7 +483,7 @@ DataStoreTLBMiss:
 finish_DTLB:
 2:	li	r21, 0x00f0
 	mtspr	DAR, r21	/* Tag DAR */
-	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */
+	rlwimi	r20, r21, 0, 0x00f0	/* Set 24-27 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	MD_RPN, r20	/* Update TLB entry */
 
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
index b94e8a8..1a0ca7b 100644
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -297,11 +297,11 @@ extern unsigned long vmalloc_start;
 #define _PAGE_PRESENT	0x0001	/* Page is valid */
 #define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
 #define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
+#define _PAGE_PSE	0x0008	/* Large Page, 8MB */
 
 /* These four software bits must be masked out when the entry is loaded
  * into the TLB, 1 SW bits left(0x0080).
  */
-#define _PAGE_EXEC	0x0008	/* software: i-cache coherency required */
 #define _PAGE_GUARDED	0x0010	/* software: guarded access */
 #define _PAGE_ACCESSED	0x0020	/* software: page referenced */
 #define _PAGE_WRITETHRU	0x0040	/* software: caching is write through */
@@ -359,6 +359,10 @@ extern unsigned long vmalloc_start;
 #define _PAGE_EXEC	0
 #endif
 
+#ifndef _PAGE_PSE
+#define _PAGE_PSE	0
+#endif
+
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /*
-- 
1.7.3.4

^ permalink raw reply related

* [PATCH 2/3] 8xx: Support LARGE pages in TLB code.
From: Joakim Tjernlund @ 2011-10-10 11:38 UTC (permalink / raw)
  To: Dan Malek, linuxppc-dev, Scott Wood, Willy Tarreau
In-Reply-To: <1318246714-5015-1-git-send-email-Joakim.Tjernlund@transmode.se>


Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 arch/ppc/kernel/head_8xx.S |   22 +++++++++++++++-------
 1 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
index 8e3fe40..439e7f2 100644
--- a/arch/ppc/kernel/head_8xx.S
+++ b/arch/ppc/kernel/head_8xx.S
@@ -368,15 +368,19 @@ InstructionTLBMiss:
 	 * for this "segment."
 	 */
 	tophys(r21,r21)
-	ori	r21,r21,1		/* Set valid bit */
-	DO_8xx_CPU6(0x2b80, r3)
-	mtspr	MI_TWC, r21	/* Set segment attributes */
 	beq-	2f		/* If zero, don't try to find a pte */
 	DO_8xx_CPU6(0x3b80, r3)
 	mtspr	MD_TWC, r21	/* Load pte table base address */
-	mfspr	r21, MD_TWC	/* ....and get the pte address */
-	lwz	r20, 0(r21)	/* Get the pte */
+	mfspr	r20, MD_TWC	/* ....and get the pte address */
+	lwz	r20, 0(r20)	/* Get the pte */
+
+	ori	r21, r21, MI_SVALID	/* Set valid bit */
+	/* Copy PSE to PS bits(8MB) */
+	rlwimi	r21, r20, 0, _PAGE_PSE
+	rlwimi	r21, r20, 32-1, _PAGE_PSE>>1
 
+	DO_8xx_CPU6(0x2b80, r3)
+	mtspr	MI_TWC, r21	/* Set segment attributes */
 #ifndef NO_SWAP
 	/* if !swap, you can delete this */
 	andi.	r21, r20, _PAGE_ACCESSED	/* test ACCESSED bit */
@@ -446,7 +450,9 @@ DataStoreTLBMiss:
 	 * this into the Linux pgd/pmd and load it in the operation
 	 * above.
 	 */
-	rlwimi	r21, r20, 0, _PAGE_GUARDED
+	rlwimi	r21, r20, 0, _PAGE_GUARDED | _PAGE_PSE
+	/* Copy PSE to PS bits(8MB), combine with GUARDED above */
+	rlwimi	r21, r20, 32-1, _PAGE_PSE>>1
 	/* Insert the WriteThru flag into the TWC from the Linux PTE.
 	 * It is bit 25 in the Linux PTE and bit 30 in the TWC
 	 */
@@ -589,7 +595,9 @@ DARFixed:
 	/* Insert the Guarded flag into the TWC from the Linux PTE.
 	 * It is bit 27 of both the Linux PTE and the TWC
 	 */
-	rlwimi	r21, r20, 0, _PAGE_GUARDED
+	rlwimi	r21, r20, 0, _PAGE_GUARDED | _PAGE_PSE
+	/* Copy PSE to PS bits(8MB), combine with GUARDED above */
+	rlwimi	r21, r20, 32-1, _PAGE_PSE>>1
 	/* Insert the WriteThru flag into the TWC from the Linux PTE.
 	 * It is bit 25 in the Linux PTE and bit 30 in the TWC
 	 */
-- 
1.7.3.4

^ permalink raw reply related

* [PATCH 3/3] 8xx: Use LARGE pages for kernel RAM.
From: Joakim Tjernlund @ 2011-10-10 11:38 UTC (permalink / raw)
  To: Dan Malek, linuxppc-dev, Scott Wood, Willy Tarreau
In-Reply-To: <1318246714-5015-1-git-send-email-Joakim.Tjernlund@transmode.se>

Use the new _PAGE_PSE to map all kernel RAM with 8 MB TLBs

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
---
 arch/ppc/mm/pgtable.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 866ae43..56e847e 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -298,7 +298,9 @@ void __init mapin_ram(void)
 		/* On the MPC8xx, we want the page shared so we
 		 * don't get ASID compares on kernel space.
 		 */
-		f = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_SHARED | _PAGE_HWEXEC;
+		f = _PAGE_PSE | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_SHARED | _PAGE_HWEXEC;
+		if (_PAGE_PSE)
+			f |= _PAGE_WRENABLE;
 #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH)
 		/* Allows stub to set breakpoints everywhere */
 		f |= _PAGE_WRENABLE;
-- 
1.7.3.4

^ permalink raw reply related

* Re: [PATCH 00/14] Backport 8xx TLB to 2.4
From: Willy Tarreau @ 2011-10-10 12:30 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Dan Malek
In-Reply-To: <1318246220-4839-1-git-send-email-Joakim.Tjernlund@transmode.se>

Hi Joakim,

On Mon, Oct 10, 2011 at 01:30:06PM +0200, Joakim Tjernlund wrote:
> This is a backport from 2.6 which I did to overcome 8xx CPU
> bugs. 8xx does not update the DAR register when taking a TLB
> error caused by dcbX and icbi insns which makes it very
> tricky to use these insns. Also the dcbst wrongly sets the
> the store bit when faulting into DTLB error.
> A few more bugs very found during development.
> 
> I know 2.4 is in strict maintenance mode and 8xx is obsolete
> but as it is still in use I wanted 8xx to age with grace.

Thank you. I must admit I was hoping those patches would come in
for a last release before the end of the year :-)

Unless there is any objection from anyone, I'll merge them when
kernel.org is back online.

Cheers,
Willy

^ permalink raw reply

* Re: [PATCH 1/3] [powerpc32] Process dynamic relocations for kernel
From: Scott Wood @ 2011-10-10 15:15 UTC (permalink / raw)
  To: Suzuki K. Poulose
  Cc: Michal Simek, tmarri, Mahesh Jagannath Salgaonkar, Dave Hansen,
	David Laight, Paul Mackerras, Alan Modra, linux ppc dev,
	Vivek Goyal
In-Reply-To: <20111010095432.16589.32935.stgit@suzukikp.in.ibm.com>

On 10/10/2011 04:55 AM, Suzuki K. Poulose wrote:
> The following patch implements the dynamic relocation processing for
> PPC32 kernel. relocate() accepts the target virtual address and relocates
>  the kernel image to the same.

How much overhead is involved in a true relocatable kernel?  Is it worth
preserving the old "relocatable" booke behavior under a different name?

-Scott

^ permalink raw reply

* Re: [PATCH 0/3] 8xx: Large page(8MB) support for 2.4
From: Willy Tarreau @ 2011-10-10 16:29 UTC (permalink / raw)
  To: Dan Malek; +Cc: Scott Wood, linuxppc-dev
In-Reply-To: <39B024CE-70A2-4D32-A29C-5702C7D2D6CF@digitaldans.com>

Hi an,

On Mon, Oct 10, 2011 at 09:22:09AM -0700, Dan Malek wrote:
> 
> Hi Joakim.
> 
> On Oct 10, 2011, at 4:38 AM, Joakim Tjernlund wrote:
> 
> >This adds Large page support for 8xx and uses it
> >for all kernel RAM....
> 
> >- Dan, what do you think :)
> 
> Since you asked, yes it looks great :-)  Now, can we
> get this into a more contemporary kernel?  I'm
> actually working on an 8xx project that may have
> a few years of life left.

At the pace of current 2.4, I'm sure the code won't have change
much a few years from now :-) It would be nice to know by now if
the current longterm branches work OK or not though.

Cheers,
Willy

^ permalink raw reply

* Re: [PATCH 0/3] 8xx: Large page(8MB) support for 2.4
From: Dan Malek @ 2011-10-10 16:22 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Willy Tarreau
In-Reply-To: <1318246714-5015-1-git-send-email-Joakim.Tjernlund@transmode.se>


Hi Joakim.

On Oct 10, 2011, at 4:38 AM, Joakim Tjernlund wrote:

> This adds Large page support for 8xx and uses it
> for all kernel RAM....

> - Dan, what do you think :)

Since you asked, yes it looks great :-)  Now, can we
get this into a more contemporary kernel?  I'm
actually working on an 8xx project that may have
a few years of life left.

Thanks.

	-- Dan

^ permalink raw reply

* [PATCH] mlx4_en: fix endianness with blue frame support
From: Thadeu Lima de Souza Cascardo @ 2011-10-10 16:42 UTC (permalink / raw)
  To: netdev
  Cc: Eli Cohen, linuxppc-dev, Thadeu Lima de Souza Cascardo,
	Yevgeny Petrilin
In-Reply-To: <1318231920.29415.404.camel@pasglop>

The doorbell register was being unconditionally swapped. In x86, that
meant it was being swapped to BE and written to the descriptor and to
memory, depending on the case of blue frame support or writing to
doorbell register. On PPC, this meant it was being swapped to LE and
then swapped back to BE while writing to the register. But in the blue
frame case, it was being written as LE to the descriptor.

The fix is not to swap doorbell unconditionally, write it to the
register as BE and convert it to BE when writing it to the descriptor.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Reported-by: Richard Hendrickson <richhend@us.ibm.com>
Cc: Eli Cohen <eli@dev.mellanox.co.il>
Cc: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/net/mlx4/en_tx.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index 6e03de0..f76ab6b 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -172,7 +172,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	memset(ring->buf, 0, ring->buf_size);
 
 	ring->qp_state = MLX4_QP_STATE_RST;
-	ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
+	ring->doorbell_qpn = ring->qp.qpn << 8;
 
 	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
 				ring->cqn, &ring->context);
@@ -791,7 +791,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_orphan(skb);
 
 	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
-		*(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
+		*(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
 		op_own |= htonl((bf_index & 0xffff) << 8);
 		/* Ensure new descirptor hits memory
 		* before setting ownership of this descriptor to HW */
@@ -812,7 +812,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		wmb();
 		tx_desc->ctrl.owner_opcode = op_own;
 		wmb();
-		writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
+		iowrite32be(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
 	}
 
 	/* Poll CQ here */
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH] mlx4_en: fix endianness with blue frame support
From: Thadeu Lima de Souza Cascardo @ 2011-10-10 16:46 UTC (permalink / raw)
  To: netdev; +Cc: Eli Cohen, linuxppc-dev, Yevgeny Petrilin
In-Reply-To: <1318264943-10009-1-git-send-email-cascardo@linux.vnet.ibm.com>

On Mon, Oct 10, 2011 at 01:42:23PM -0300, Thadeu Lima de Souza Cascardo wrote:
> The doorbell register was being unconditionally swapped. In x86, that
> meant it was being swapped to BE and written to the descriptor and to
> memory, depending on the case of blue frame support or writing to
> doorbell register. On PPC, this meant it was being swapped to LE and
> then swapped back to BE while writing to the register. But in the blue
> frame case, it was being written as LE to the descriptor.
> 
> The fix is not to swap doorbell unconditionally, write it to the
> register as BE and convert it to BE when writing it to the descriptor.
> 
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
> Reported-by: Richard Hendrickson <richhend@us.ibm.com>
> Cc: Eli Cohen <eli@dev.mellanox.co.il>
> Cc: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---

So I tested this patch and it works for me. Thanks Ben and Eli for
finding out the problem with doorbell in the descriptor.

Regards,
Cascardo.

>  drivers/net/mlx4/en_tx.c |    6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
> index 6e03de0..f76ab6b 100644
> --- a/drivers/net/mlx4/en_tx.c
> +++ b/drivers/net/mlx4/en_tx.c
> @@ -172,7 +172,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
>  	memset(ring->buf, 0, ring->buf_size);
> 
>  	ring->qp_state = MLX4_QP_STATE_RST;
> -	ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
> +	ring->doorbell_qpn = ring->qp.qpn << 8;
> 
>  	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
>  				ring->cqn, &ring->context);
> @@ -791,7 +791,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>  		skb_orphan(skb);
> 
>  	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
> -		*(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
> +		*(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn);
>  		op_own |= htonl((bf_index & 0xffff) << 8);
>  		/* Ensure new descirptor hits memory
>  		* before setting ownership of this descriptor to HW */
> @@ -812,7 +812,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
>  		wmb();
>  		tx_desc->ctrl.owner_opcode = op_own;
>  		wmb();
> -		writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
> +		iowrite32be(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
>  	}
> 
>  	/* Poll CQ here */
> -- 
> 1.7.4.4
> 

^ permalink raw reply

* Re: [PATCH 0/3] 8xx: Large page(8MB) support for 2.4
From: Joakim Tjernlund @ 2011-10-10 16:45 UTC (permalink / raw)
  To: Dan Malek; +Cc: Scott Wood, linuxppc-dev, Willy Tarreau
In-Reply-To: <39B024CE-70A2-4D32-A29C-5702C7D2D6CF@digitaldans.com>

Dan Malek <ppc6dev@digitaldans.com> wrote on 2011/10/10 18:22:09:
>
>
> Hi Joakim.
>
> On Oct 10, 2011, at 4:38 AM, Joakim Tjernlund wrote:
>
> > This adds Large page support for 8xx and uses it
> > for all kernel RAM....
>
> > - Dan, what do you think :)
>
> Since you asked, yes it looks great :-)  Now, can we
> get this into a more contemporary kernel?  I'm
> actually working on an 8xx project that may have
> a few years of life left.

That is an easy port but I will have to do that blind. Would you
mind take this for a spin on 2.4 first?

The more interesting part is if one should use other sized(16K or 512K) large pages too?
Those should be useful for user space but it is a lot of work. I haven't checked
what large page support for user space is in 2.6 for ppc though.

 Jocke

^ permalink raw reply

* Re: [PATCH 1/3] [powerpc32] Process dynamic relocations for kernel
From: Suzuki Poulose @ 2011-10-10 17:17 UTC (permalink / raw)
  To: Scott Wood
  Cc: Michal Simek, tmarri, Mahesh Jagannath Salgaonkar, Dave Hansen,
	David Laight, Paul Mackerras, Alan Modra, linux ppc dev,
	Vivek Goyal
In-Reply-To: <4E930BFA.5030701@freescale.com>

On 10/10/11 20:45, Scott Wood wrote:
> On 10/10/2011 04:55 AM, Suzuki K. Poulose wrote:
>> The following patch implements the dynamic relocation processing for
>> PPC32 kernel. relocate() accepts the target virtual address and relocates
>>   the kernel image to the same.
>
> How much overhead is involved in a true relocatable kernel?  Is it worth
> preserving the old "relocatable" booke behavior under a different name?

There are '75782' on an ebony kernel with minimal config. So thats a pretty big
number for small embedded chips. I guess, preserving the 'old relocatable' (page
aligned approach) would be a good idea for the architectures which can afford it.
e.g, places where TLB size is 64M or less.

Thanks
Suzuki

^ permalink raw reply

* Re: [PATCH 1/3] [powerpc32] Process dynamic relocations for kernel
From: Scott Wood @ 2011-10-10 17:55 UTC (permalink / raw)
  To: Suzuki Poulose
  Cc: Michal Simek, tmarri, Mahesh Jagannath Salgaonkar, Dave Hansen,
	David Laight, Paul Mackerras, Alan Modra, linux ppc dev,
	Vivek Goyal
In-Reply-To: <4E932897.2060907@in.ibm.com>

On 10/10/2011 12:17 PM, Suzuki Poulose wrote:
> On 10/10/11 20:45, Scott Wood wrote:
>> On 10/10/2011 04:55 AM, Suzuki K. Poulose wrote:
>>> The following patch implements the dynamic relocation processing for
>>> PPC32 kernel. relocate() accepts the target virtual address and
>>> relocates
>>>   the kernel image to the same.
>>
>> How much overhead is involved in a true relocatable kernel?  Is it worth
>> preserving the old "relocatable" booke behavior under a different name?
> 
> There are '75782' on an ebony kernel with minimal config. So thats a
> pretty big
> number for small embedded chips. I guess, preserving the 'old
> relocatable' (page
> aligned approach) would be a good idea for the architectures which can
> afford it.
> e.g, places where TLB size is 64M or less.

The systems we've been using this option on aren't *that* small -- I was
thinking more about runtime overhead (beyond the time taken at boot to
process relocations).

-Scott

^ permalink raw reply

* Re: [PATCH 2/3] [44x] Enable CONFIG_RELOCATABLE for PPC44x
From: Scott Wood @ 2011-10-10 18:00 UTC (permalink / raw)
  To: Suzuki K. Poulose
  Cc: Michal Simek, tmarri, Mahesh Jagannath Salgaonkar, Dave Hansen,
	David Laight, Paul Mackerras, linux ppc dev, Vivek Goyal
In-Reply-To: <20111010095514.16589.85241.stgit@suzukikp.in.ibm.com>

On 10/10/2011 04:56 AM, Suzuki K. Poulose wrote:
> #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_44x)
> #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
> #define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
> #endif

Why is this 44x-specific?

-Scott

^ permalink raw reply

* Re: [PATCH 0/3] 8xx: Large page(8MB) support for 2.4
From: Dan Malek @ 2011-10-10 18:03 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Willy Tarreau
In-Reply-To: <OFAF68F93F.57D058F1-ONC1257925.005B7C6F-C1257925.005C10D7@transmode.se>


On Oct 10, 2011, at 9:45 AM, Joakim Tjernlund wrote:

> That is an easy port but I will have to do that blind. Would you
> mind take this for a spin on 2.4 first?

My current system is running 2.6, so I don't have much
interested in testing 2.4

> The more interesting part is if one should use other sized(16K or  
> 512K) large pages too?

My thought long ago was most of the 8xx systems have rather small
real memories, so the larger pages, especially 512K may be too wasteful.
I've always been a fan of keeping the TLB handlers tiny and simple,
rather then spending the instructions doing complex replacements.
Remember, this also affects the I- and D-cache, so a more frequent
and trivial PTE update could very well gain larger system performance
than the management of larger pages with more complex code.
With all of the bug fix code in the handlers, maybe a larger page would
be better.

> Those should be useful for user space but it is a lot of work. I  
> haven't checked
> what large page support for user space is in 2.6 for ppc though.

The 2.6/3.0 kernel supports different, but fixed, page sizes.  IIRC,  
anything
over 64K may require distribution rebuilding to realign code/data  
sections
to more restrictive boundaries.  Maybe a 16K page would show some  
benefit.
I'll try to make some time to play with it.

Thanks.

	-- Dan

^ permalink raw reply

* Re: [PATCH] mlx4_en: fix endianness with blue frame support
From: David Miller @ 2011-10-10 18:10 UTC (permalink / raw)
  To: cascardo; +Cc: netdev, linuxppc-dev, eli, yevgenyp
In-Reply-To: <20111010164654.GA3648@oc1711230544.ibm.com>

From: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Date: Mon, 10 Oct 2011 13:46:54 -0300

> On Mon, Oct 10, 2011 at 01:42:23PM -0300, Thadeu Lima de Souza Cascardo wrote:
>> The doorbell register was being unconditionally swapped. In x86, that
>> meant it was being swapped to BE and written to the descriptor and to
>> memory, depending on the case of blue frame support or writing to
>> doorbell register. On PPC, this meant it was being swapped to LE and
>> then swapped back to BE while writing to the register. But in the blue
>> frame case, it was being written as LE to the descriptor.
>> 
>> The fix is not to swap doorbell unconditionally, write it to the
>> register as BE and convert it to BE when writing it to the descriptor.
>> 
>> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
>> Reported-by: Richard Hendrickson <richhend@us.ibm.com>
>> Cc: Eli Cohen <eli@dev.mellanox.co.il>
>> Cc: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> ---
> 
> So I tested this patch and it works for me. Thanks Ben and Eli for
> finding out the problem with doorbell in the descriptor.

Applied, thanks everyone.

^ permalink raw reply

* [PATCH 0/13] Hugetlb for 64-bit Freescale Book3E
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david

This series of patches contains mostly cleanup code that allows
the enablement of hugetlb for 64-bit Freescale BookE processors.
There are also some bits that I dropped from the 32-bit release
that are added back, as they are needed by other implementations.
Otherwise, it's mostly a bunch of code rearrangement, changes
in #include protections, and Kconfig changes.

Cheers,
Becky

 arch/powerpc/configs/corenet32_smp_defconfig |    9 +--
 arch/powerpc/configs/corenet64_smp_defconfig |    6 +-
 arch/powerpc/configs/mpc85xx_defconfig       |    6 +-
 arch/powerpc/configs/mpc85xx_smp_defconfig   |    7 +-
 arch/powerpc/include/asm/hugetlb.h           |   36 ++++++--
 arch/powerpc/include/asm/page_64.h           |    2 +
 arch/powerpc/kernel/setup_64.c               |   10 ++
 arch/powerpc/mm/hugetlbpage-book3e.c         |   15 ++--
 arch/powerpc/mm/hugetlbpage.c                |  116 ++++++++++++++++----------
 arch/powerpc/mm/tlb_low_64e.S                |   36 ++++-----
 arch/powerpc/mm/tlb_nohash.c                 |    2 +-
 arch/powerpc/platforms/Kconfig.cputype       |    4 +-
 12 files changed, 143 insertions(+), 106 deletions(-)

^ permalink raw reply

* [PATCH 01/13] powerpc: Only define HAVE_ARCH_HUGETLB_UNMAPPED_AREA if PPC_MM_SLICES
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <1318279848494-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

If we don't have slices, we should be able to use the generic
hugetlb_get_unmapped_area() code

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/include/asm/page_64.h |    2 ++
 arch/powerpc/mm/hugetlbpage.c      |    6 ++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index fb40ede..fed85e6 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -130,7 +130,9 @@ do {						\
 
 #ifdef CONFIG_HUGETLB_PAGE
 
+#ifdef CONFIG_PPC_MM_SLICES
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 48b65be..71c6533 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -686,19 +686,17 @@ int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
 	return 1;
 }
 
+#ifdef CONFIG_PPC_MM_SLICES
 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
 					unsigned long flags)
 {
-#ifdef CONFIG_PPC_MM_SLICES
 	struct hstate *hstate = hstate_file(file);
 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
-#else
-	return get_unmapped_area(file, addr, len, pgoff, flags);
-#endif
 }
+#endif
 
 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 02/13] powerpc: hugetlb: fix huge_ptep_set_access_flags return value
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798624083-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

There was an unconditional return of "1" in the original code
from David Gibson, and I dropped it because it wasn't needed
for FSL BOOKE 32-bit.  However, not all systems (including 64-bit
FSL BOOKE) do loading of the hpte from the fault handler asm
and depend on this function returning 1, which causes a call
to update_mmu_cache() that writes an entry into the tlb.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/include/asm/hugetlb.h |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 8600493..70f9885 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -124,7 +124,18 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
 {
+#if defined(CONFIG_PPC_MMU_NOHASH) && \
+	!(defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC32))
+	/*
+	 * The "return 1" forces a call of update_mmu_cache, which will write a
+	 * TLB entry.  Without this, platforms that don't do a write of the TLB
+	 * entry in the TLB miss handler asm will fault ad infinitum.
+	 */
+	ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	return 1;
+#else
 	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+#endif
 }
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 03/13] powerpc: Fix booke hugetlb preload code for PPC_MM_SLICES and 64-bit
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798643553-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

This patch does 2 things: It corrects the code that determines the
size to write into MAS1 for the PPC_MM_SLICES case (this originally
came from David Gibson and I had incorrectly altered it), and it
changes the methodolody used to calculate the size for !PPC_MM_SLICES
to work for 64-bit as well as 32-bit.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/mm/hugetlbpage-book3e.c |   15 ++++++---------
 1 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 343ad0b..4d6d849 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -45,23 +45,20 @@ void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte)
 	unsigned long flags;
 
 #ifdef CONFIG_PPC_FSL_BOOK3E
-	int index, lz, ncams;
-	struct vm_area_struct *vma;
+	int index, ncams;
 #endif
 
 	if (unlikely(is_kernel_addr(ea)))
 		return;
 
 #ifdef CONFIG_PPC_MM_SLICES
-	psize = mmu_get_tsize(get_slice_psize(mm, ea));
-	tsize = mmu_get_psize(psize);
+	psize = get_slice_psize(mm, ea);
+	tsize = mmu_get_tsize(psize);
 	shift = mmu_psize_defs[psize].shift;
 #else
-	vma = find_vma(mm, ea);
-	psize = vma_mmu_pagesize(vma);	/* returns actual size in bytes */
-	asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize));
-	shift = 31 - lz;
-	tsize = 21 - lz;
+	psize = vma_mmu_pagesize(find_vma(mm, ea));
+	shift = __ilog2(psize);
+	tsize = shift - 10;
 #endif
 
 	/*
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 04/13] powerpc: Update hugetlb huge_pte_alloc and tablewalk code for FSL BOOKE
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798681100-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

This updates the hugetlb page table code to handle 64-bit FSL_BOOKE.
The previous 32-bit work counted on the inner levels of the page table
collapsing.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/mm/hugetlbpage.c |   48 +++++++++++++++++++++++++++++++++++-----
 1 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 71c6533..b4a4884 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -155,11 +155,28 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			hpdp->pd = 0;
 		kmem_cache_free(cachep, new);
 	}
+#else
+	if (!hugepd_none(*hpdp))
+		kmem_cache_free(cachep, new);
+	else
+		hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
 #endif
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
 
+/*
+ * These macros define how to determine which level of the page table holds
+ * the hpdp.
+ */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
+#define HUGEPD_PUD_SHIFT PUD_SHIFT
+#else
+#define HUGEPD_PGD_SHIFT PUD_SHIFT
+#define HUGEPD_PUD_SHIFT PMD_SHIFT
+#endif
+
 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
 	pgd_t *pg;
@@ -172,12 +189,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	addr &= ~(sz-1);
 
 	pg = pgd_offset(mm, addr);
-	if (pshift >= PUD_SHIFT) {
+
+	if (pshift >= HUGEPD_PGD_SHIFT) {
 		hpdp = (hugepd_t *)pg;
 	} else {
 		pdshift = PUD_SHIFT;
 		pu = pud_alloc(mm, pg, addr);
-		if (pshift >= PMD_SHIFT) {
+		if (pshift >= HUGEPD_PUD_SHIFT) {
 			hpdp = (hugepd_t *)pu;
 		} else {
 			pdshift = PMD_SHIFT;
@@ -453,14 +471,23 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	unsigned long start;
 
 	start = addr;
-	pmd = pmd_offset(pud, addr);
 	do {
+		pmd = pmd_offset(pud, addr);
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd))
 			continue;
+#ifdef CONFIG_PPC_FSL_BOOK3E
+		/*
+		 * Increment next by the size of the huge mapping since
+		 * there may be more than one entry at this level for a
+		 * single hugepage, but all of them point to
+		 * the same kmem cache that holds the hugepte.
+		 */
+		next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
+#endif
 		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
 				  addr, next, floor, ceiling);
-	} while (pmd++, addr = next, addr != end);
+	} while (addr = next, addr != end);
 
 	start &= PUD_MASK;
 	if (start < floor)
@@ -487,8 +514,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	unsigned long start;
 
 	start = addr;
-	pud = pud_offset(pgd, addr);
 	do {
+		pud = pud_offset(pgd, addr);
 		next = pud_addr_end(addr, end);
 		if (!is_hugepd(pud)) {
 			if (pud_none_or_clear_bad(pud))
@@ -496,10 +523,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
 					       ceiling);
 		} else {
+#ifdef CONFIG_PPC_FSL_BOOK3E
+			/*
+			 * Increment next by the size of the huge mapping since
+			 * there may be more than one entry at this level for a
+			 * single hugepage, but all of them point to
+			 * the same kmem cache that holds the hugepte.
+			 */
+			next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
+#endif
 			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
 					  addr, next, floor, ceiling);
 		}
-	} while (pud++, addr = next, addr != end);
+	} while (addr = next, addr != end);
 
 	start &= PGDIR_MASK;
 	if (start < floor)
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 05/13] powerpc: hugetlb: modify include usage for FSL BookE code
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <1318279870278-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

The original 32-bit hugetlb implementation used PPC64 vs PPC32 to
determine which code path to take.  However, the final hugetlb
implementation for 64-bit FSL ended up shared with the FSL
32-bit code so the actual check needs to be FSL_BOOK3E vs
everything else.  This patch changes the include protections to
reflect this.

There are also a couple of related comment fixes.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/include/asm/hugetlb.h |    6 ++--
 arch/powerpc/mm/hugetlbpage.c      |   54 ++++++++++++++++-------------------
 arch/powerpc/mm/tlb_nohash.c       |    2 +-
 3 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 70f9885..273acfa 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -22,14 +22,14 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
 				    unsigned pdshift)
 {
 	/*
-	 * On 32-bit, we have multiple higher-level table entries that point to
-	 * the same hugepte.  Just use the first one since they're all
+	 * On FSL BookE, we have multiple higher-level table entries that
+	 * point to the same hugepte.  Just use the first one since they're all
 	 * identical.  So for that case, idx=0.
 	 */
 	unsigned long idx = 0;
 
 	pte_t *dir = hugepd_page(*hpdp);
-#ifdef CONFIG_PPC64
+#ifndef CONFIG_PPC_FSL_BOOK3E
 	idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
 #endif
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b4a4884..9a34606 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -33,17 +33,17 @@ unsigned int HPAGE_SHIFT;
  * implementations may have more than one gpage size due to limitations
  * of the memory allocators, so we need multiple arrays
  */
-#ifdef CONFIG_PPC64
-#define MAX_NUMBER_GPAGES	1024
-static u64 gpage_freearray[MAX_NUMBER_GPAGES];
-static unsigned nr_gpages;
-#else
+#ifdef CONFIG_PPC_FSL_BOOK3E
 #define MAX_NUMBER_GPAGES	128
 struct psize_gpages {
 	u64 gpage_list[MAX_NUMBER_GPAGES];
 	unsigned int nr_gpages;
 };
 static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
+#else
+#define MAX_NUMBER_GPAGES	1024
+static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+static unsigned nr_gpages;
 #endif
 
 static inline int shift_to_mmu_psize(unsigned int shift)
@@ -114,12 +114,12 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	struct kmem_cache *cachep;
 	pte_t *new;
 
-#ifdef CONFIG_PPC64
-	cachep = PGT_CACHE(pdshift - pshift);
-#else
+#ifdef CONFIG_PPC_FSL_BOOK3E
 	int i;
 	int num_hugepd = 1 << (pshift - pdshift);
 	cachep = hugepte_cache;
+#else
+	cachep = PGT_CACHE(pdshift - pshift);
 #endif
 
 	new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
@@ -131,12 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 		return -ENOMEM;
 
 	spin_lock(&mm->page_table_lock);
-#ifdef CONFIG_PPC64
-	if (!hugepd_none(*hpdp))
-		kmem_cache_free(cachep, new);
-	else
-		hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#else
+#ifdef CONFIG_PPC_FSL_BOOK3E
 	/*
 	 * We have multiple higher-level entries that point to the same
 	 * actual pte location.  Fill in each as we go and backtrack on error.
@@ -215,7 +210,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
 	return hugepte_offset(hpdp, addr, pdshift);
 }
 
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_FSL_BOOK3E
 /* Build list of addresses of gigantic pages.  This function is used in early
  * boot before the buddy or bootmem allocator is setup.
  */
@@ -335,7 +330,7 @@ void __init reserve_hugetlb_gpages(void)
 	}
 }
 
-#else /* PPC64 */
+#else /* !PPC_FSL_BOOK3E */
 
 /* Build list of addresses of gigantic pages.  This function is used in early
  * boot before the buddy or bootmem allocator is setup.
@@ -373,7 +368,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_FSL_BOOK3E
 #define HUGEPD_FREELIST_SIZE \
 	((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
 
@@ -433,11 +428,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
 	unsigned long pdmask = ~((1UL << pdshift) - 1);
 	unsigned int num_hugepd = 1;
 
-#ifdef CONFIG_PPC64
-	unsigned int shift = hugepd_shift(*hpdp);
-#else
-	/* Note: On 32-bit the hpdp may be the first of several */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+	/* Note: On fsl the hpdp may be the first of several */
 	num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
+#else
+	unsigned int shift = hugepd_shift(*hpdp);
 #endif
 
 	start &= pdmask;
@@ -455,10 +450,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
 		hpdp->pd = 0;
 
 	tlb->need_flush = 1;
-#ifdef CONFIG_PPC64
-	pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#else
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
 	hugepd_free(tlb, hugepte);
+#else
+	pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 #endif
 }
 
@@ -590,12 +586,12 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 				continue;
 			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 		} else {
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_FSL_BOOK3E
 			/*
 			 * Increment next by the size of the huge mapping since
-			 * on 32-bit there may be more than one entry at the pgd
-			 * level for a single hugepage, but all of them point to
-			 * the same kmem cache that holds the hugepte.
+			 * there may be more than one entry at the pgd level
+			 * for a single hugepage, but all of them point to the
+			 * same kmem cache that holds the hugepte.
 			 */
 			next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
 #endif
@@ -806,7 +802,7 @@ static int __init hugepage_setup_sz(char *str)
 }
 __setup("hugepagesz=", hugepage_setup_sz);
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_FSL_BOOK3E
 struct kmem_cache *hugepte_cache;
 static int __init hugetlbpage_init(void)
 {
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index afc95c7..c02901f 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -51,7 +51,7 @@
  * indirect page table entries.
  */
 #ifdef CONFIG_PPC_BOOK3E_MMU
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_FSL_BOOK3E
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 06/13] powerpc: Whitespace/comment changes to tlb_low_64e.S
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798731357-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

I happened to comment this code while I was digging through it;
we might as well commit that.  I also made some whitespace
changes - the existing code had a lot of unnecessary newlines
that I found annoying when I was working on my tiny laptop.

No functional changes.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/mm/tlb_low_64e.S |   28 +++++++++++-----------------
 1 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index dc4a5f3..71d5d9a 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -94,11 +94,11 @@
 
 	srdi	r15,r16,60		/* get region */
 	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-	bne-	dtlb_miss_fault_bolted
+	bne-	dtlb_miss_fault_bolted	/* Bail if fault addr is invalid */
 
 	rlwinm	r10,r11,32-19,27,27
 	rlwimi	r10,r11,32-16,19,19
-	cmpwi	r15,0
+	cmpwi	r15,0			/* user vs kernel check */
 	ori	r10,r10,_PAGE_PRESENT
 	oris	r11,r10,_PAGE_ACCESSED@h
 
@@ -120,44 +120,38 @@ tlb_miss_common_bolted:
 	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
 	cmpldi	cr0,r14,0
 	clrrdi	r15,r15,3
-	beq	tlb_miss_fault_bolted
+	beq	tlb_miss_fault_bolted	/* No PGDIR, bail */
 
 BEGIN_MMU_FTR_SECTION
 	/* Set the TLB reservation and search for existing entry. Then load
 	 * the entry.
 	 */
 	PPC_TLBSRX_DOT(0,r16)
-	ldx	r14,r14,r15
-	beq	normal_tlb_miss_done
+	ldx	r14,r14,r15		/* grab pgd entry */
+	beq	normal_tlb_miss_done	/* tlb exists already, bail */
 MMU_FTR_SECTION_ELSE
-	ldx	r14,r14,r15
+	ldx	r14,r14,r15		/* grab pgd entry */
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
 
 #ifndef CONFIG_PPC_64K_PAGES
 	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-
-	cmpldi	cr0,r14,0
-	beq	tlb_miss_fault_bolted
-
-	ldx	r14,r14,r15
+	cmlpdi	cr0,r14,0
+	beq	tlb_miss_fault_bolted	/* Bad pgd entry */
+	ldx	r14,r14,r15		/* grab pud entry */
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-
 	cmpldi	cr0,r14,0
 	beq	tlb_miss_fault_bolted
-
-	ldx	r14,r14,r15
+	ldx	r14,r14,r15		/* Grab pmd entry */
 
 	rldicl	r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-
 	cmpldi	cr0,r14,0
 	beq	tlb_miss_fault_bolted
-
-	ldx	r14,r14,r15
+	ldx	r14,r14,r15		/* Grab PTE */
 
 	/* Check if required permissions are met */
 	andc.	r15,r11,r14
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 07/13] powerpc: Add hugepage support to 64-bit tablewalk code for FSL_BOOK3E
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798761457-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

Before hugetlb, at each level of the table, we test for
!0 to determine if we have a valid table entry.  With hugetlb, this
compare becomes:
        < 0 is a normal entry
        0 is an invalid entry
        > 0 is huge

This works because the hugepage code pulls the top bit off the entry
(which for non-huge entries always has the top bit set) as an
indicator that we have a hugepage.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/mm/tlb_low_64e.S |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 71d5d9a..ff672bd 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -136,22 +136,22 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
 #ifndef CONFIG_PPC_64K_PAGES
 	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-	cmlpdi	cr0,r14,0
-	beq	tlb_miss_fault_bolted	/* Bad pgd entry */
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted	/* Bad pgd entry or hugepage; bail */
 	ldx	r14,r14,r15		/* grab pud entry */
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-	cmpldi	cr0,r14,0
-	beq	tlb_miss_fault_bolted
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted
 	ldx	r14,r14,r15		/* Grab pmd entry */
 
 	rldicl	r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
 	clrrdi	r15,r15,3
-	cmpldi	cr0,r14,0
-	beq	tlb_miss_fault_bolted
-	ldx	r14,r14,r15		/* Grab PTE */
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted
+	ldx	r14,r14,r15		/* Grab PTE, normal (!huge) page */
 
 	/* Check if required permissions are met */
 	andc.	r15,r11,r14
-- 
1.5.6.5

^ permalink raw reply related

* [PATCH 08/13] powerpc: Add gpages reservation code for 64-bit FSL BOOKE
From: Becky Bruce @ 2011-10-10 20:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: david
In-Reply-To: <13182798791090-git-send-email-beckyb@kernel.crashing.org>

From: Becky Bruce <beckyb@kernel.crashing.org>

For 64-bit FSL_BOOKE implementations, gigantic pages need to be
reserved at boot time by the memblock code based on the command line.
This adds the call that handles the reservation, and fixes some code
comments.

It also removes the previous pr_err when reserve_hugetlb_gpages
is called on a system without hugetlb enabled - the way the code is
structured, the call is unconditional and the resulting error message
spurious and confusing.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/powerpc/include/asm/hugetlb.h |   19 ++++++++++++++-----
 arch/powerpc/kernel/setup_64.c     |   10 ++++++++++
 arch/powerpc/mm/hugetlbpage.c      |    8 ++++----
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 273acfa..555044c 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -5,7 +5,6 @@
 #include <asm/page.h>
 
 extern struct kmem_cache *hugepte_cache;
-extern void __init reserve_hugetlb_gpages(void);
 
 static inline pte_t *hugepd_page(hugepd_t hpd)
 {
@@ -153,14 +152,24 @@ static inline void arch_release_hugepage(struct page *page)
 }
 
 #else /* ! CONFIG_HUGETLB_PAGE */
-static inline void reserve_hugetlb_gpages(void)
-{
-	pr_err("Cannot reserve gpages without hugetlb enabled\n");
-}
 static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 				      unsigned long vmaddr)
 {
 }
+#endif /* CONFIG_HUGETLB_PAGE */
+
+
+/*
+ * FSL Book3E platforms require special gpage handling - the gpages
+ * are reserved early in the boot process by memblock instead of via
+ * the .dts as on IBM platforms.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E)
+extern void __init reserve_hugetlb_gpages(void);
+#else
+static inline void reserve_hugetlb_gpages(void)
+{
+}
 #endif
 
 #endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d4168c9..2e334d4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -35,6 +35,8 @@
 #include <linux/pci.h>
 #include <linux/lockdep.h>
 #include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -64,6 +66,7 @@
 #include <asm/mmu_context.h>
 #include <asm/code-patching.h>
 #include <asm/kvm_ppc.h>
+#include <asm/hugetlb.h>
 
 #include "setup.h"
 
@@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	/*
+	 * Reserve any gigantic pages requested on the command line.
+	 * memblock needs to have been initialized by the time this is
+	 * called since this will reserve memory.
+	 */
+	reserve_hugetlb_gpages();
+
 	DBG(" <- early_setup()\n");
 }
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9a34606..51855a0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -28,10 +28,10 @@ unsigned int HPAGE_SHIFT;
 
 /*
  * Tracks gpages after the device tree is scanned and before the
- * huge_boot_pages list is ready.  On 64-bit implementations, this is
- * just used to track 16G pages and so is a single array.  32-bit
- * implementations may have more than one gpage size due to limitations
- * of the memory allocators, so we need multiple arrays
+ * huge_boot_pages list is ready.  On non-Freescale implementations, this is
+ * just used to track 16G pages and so is a single array.  FSL-based
+ * implementations may have more than one gpage size, so we need multiple
+ * arrays
  */
 #ifdef CONFIG_PPC_FSL_BOOK3E
 #define MAX_NUMBER_GPAGES	128
-- 
1.5.6.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox