linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Optimisation on 8xx prior to hugepage implementation
@ 2016-09-16  6:42 Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 1/3] powerpc/8xx: use r3 to scratch CR in ITLBmiss Christophe Leroy
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Christophe Leroy @ 2016-09-16  6:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Scott Wood
  Cc: linux-kernel, linuxppc-dev

This serie is a prologue of hugepage implementation on the 8xx.
It some how optimises the DTLBMiss handler while allowing at the
same time to hook the hugepage handling that will be introduced in
a subsequent patch serie.

v1 of those patches was part of a serie identified
"powerpc/8xx: implementation of huge pages"

Christophe Leroy (3):
  powerpc/8xx: use r3 to scratch CR in ITLBmiss
  powerpc/8xx: Move additional DTLBMiss handlers out of exception area
  powerpc/8xx: make user addr DTLB miss the short path

 arch/powerpc/kernel/head_8xx.S | 134 +++++++++++++++++++----------------------
 1 file changed, 62 insertions(+), 72 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/3] powerpc/8xx: use r3 to scratch CR in ITLBmiss
  2016-09-16  6:42 [PATCH v2 0/3] Optimisation on 8xx prior to hugepage implementation Christophe Leroy
@ 2016-09-16  6:42 ` Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 2/3] powerpc/8xx: Move additional DTLBMiss handlers out of exception area Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 3/3] powerpc/8xx: make user addr DTLB miss the short path Christophe Leroy
  2 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2016-09-16  6:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Scott Wood
  Cc: linux-kernel, linuxppc-dev

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/head_8xx.S | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 8632515..fd5b53d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -323,7 +323,7 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
-#ifdef CONFIG_8xx_CPU6
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
 	mtspr	SPRN_SPRG_SCRATCH2, r3
 #endif
 	EXCEPTION_PROLOG_0
@@ -331,23 +331,20 @@ InstructionTLBMiss:
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 */
+	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
+	INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
 #if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
 	/* Only modules will cause ITLB Misses as we always
 	 * pin the first 8MB of kernel memory */
-	mfspr	r11, SPRN_SRR0	/* Get effective address of fault */
-	INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
-	mfcr	r10
-	IS_KERNEL(r11, r11)
+	mfcr	r3
+	IS_KERNEL(r11, r10)
+#endif
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
+#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
 	BRANCH_UNLESS_KERNEL(3f)
 	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
-	mtcr	r10
-	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
-#else
-	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
-	INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
-	mfspr	r11, SPRN_M_TW	/* Get level 1 table base address */
+	mtcr	r3
 #endif
 	/* Insert level 1 index */
 	rlwimi	r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -379,7 +376,7 @@ InstructionTLBMiss:
 	MTSPR_CPU6(SPRN_MI_RPN, r10, r3)	/* Update TLB entry */
 
 	/* Restore registers */
-#ifdef CONFIG_8xx_CPU6
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
 	mfspr	r3, SPRN_SPRG_SCRATCH2
 #endif
 	EXCEPTION_EPILOG_0
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 2/3] powerpc/8xx: Move additional DTLBMiss handlers out of exception area
  2016-09-16  6:42 [PATCH v2 0/3] Optimisation on 8xx prior to hugepage implementation Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 1/3] powerpc/8xx: use r3 to scratch CR in ITLBmiss Christophe Leroy
@ 2016-09-16  6:42 ` Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 3/3] powerpc/8xx: make user addr DTLB miss the short path Christophe Leroy
  2 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2016-09-16  6:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Scott Wood
  Cc: linux-kernel, linuxppc-dev

When all options are activated, there is not enough space for the
DTLBMiss handlers that handles IMMR area and linear RAM pages in
the exception area once we have added hugepage handling.
So lets move them after .0x2000

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/head_8xx.S | 84 +++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fd5b53d..9cc240d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -382,26 +382,6 @@ InstructionTLBMiss:
 	EXCEPTION_EPILOG_0
 	rfi
 
-/*
- * Bottom part of DataStoreTLBMiss handler for IMMR area
- * not enough space in the DataStoreTLBMiss area
- */
-DTLBMissIMMR:
-	mtcr	r10
-	/* Set 512k byte guarded page and mark it valid */
-	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID
-	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
-	mfspr	r10, SPRN_IMMR			/* Get current IMMR */
-	rlwinm	r10, r10, 0, 0xfff80000		/* Get 512 kbytes boundary */
-	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
-			  _PAGE_PRESENT | _PAGE_NO_CACHE
-	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
-
-	li	r11, RPN_PATTERN
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	EXCEPTION_EPILOG_0
-	rfi
-
 	. = 0x1200
 DataStoreTLBMiss:
 	EXCEPTION_PROLOG_0
@@ -420,7 +400,7 @@ DataStoreTLBMiss:
 _ENTRY(DTLBMiss_jmp)
 	beq-	DTLBMissIMMR
 #endif
-	bge-	cr7, 4f
+	bge-	cr7, DTLBMissLinear
 
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
 3:
@@ -487,27 +467,6 @@ _ENTRY(DTLBMiss_jmp)
 	EXCEPTION_EPILOG_0
 	rfi
 
-4:
-_ENTRY(DTLBMiss_cmp)
-	cmpli	cr0, r11, (PAGE_OFFSET + 0x1800000)@h
-	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
-	bge-	3b
-
-	mtcr	r10
-	/* Set 8M byte page and mark it valid */
-	li	r10, MD_PS8MEG | MD_SVALID
-	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
-	mfspr	r10, SPRN_MD_EPN
-	rlwinm	r10, r10, 0, 0x0f800000		/* 8xx supports max 256Mb RAM */
-	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
-			  _PAGE_PRESENT
-	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
-
-	li	r11, RPN_PATTERN
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	EXCEPTION_EPILOG_0
-	rfi
-
 
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
@@ -569,6 +528,47 @@ DARFixed:/* Return from dcbx instruction bug workaround */
 
 	. = 0x2000
 
+/*
+ * Bottom part of DataStoreTLBMiss handlers for IMMR area and linear RAM.
+ * not enough space in the DataStoreTLBMiss area.
+ */
+DTLBMissIMMR:
+	mtcr	r10
+	/* Set 512k byte guarded page and mark it valid */
+	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID
+	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+	mfspr	r10, SPRN_IMMR			/* Get current IMMR */
+	rlwinm	r10, r10, 0, 0xfff80000		/* Get 512 kbytes boundary */
+	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
+			  _PAGE_PRESENT | _PAGE_NO_CACHE
+	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
+
+	li	r11, RPN_PATTERN
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	EXCEPTION_EPILOG_0
+	rfi
+
+DTLBMissLinear:
+_ENTRY(DTLBMiss_cmp)
+	cmpli	cr0, r11, (PAGE_OFFSET + 0x1800000)@h
+	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+	bge-	3b
+
+	mtcr	r10
+	/* Set 8M byte page and mark it valid */
+	li	r10, MD_PS8MEG | MD_SVALID
+	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+	mfspr	r10, SPRN_MD_EPN
+	rlwinm	r10, r10, 0, 0x0f800000		/* 8xx supports max 256Mb RAM */
+	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
+			  _PAGE_PRESENT
+	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
+
+	li	r11, RPN_PATTERN
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	EXCEPTION_EPILOG_0
+	rfi
+
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
  * by decoding the registers used by the dcbx instruction and adding them.
  * DAR is set to the calculated address.
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 3/3] powerpc/8xx: make user addr DTLB miss the short path
  2016-09-16  6:42 [PATCH v2 0/3] Optimisation on 8xx prior to hugepage implementation Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 1/3] powerpc/8xx: use r3 to scratch CR in ITLBmiss Christophe Leroy
  2016-09-16  6:42 ` [PATCH v2 2/3] powerpc/8xx: Move additional DTLBMiss handlers out of exception area Christophe Leroy
@ 2016-09-16  6:42 ` Christophe Leroy
  2 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2016-09-16  6:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Scott Wood
  Cc: linux-kernel, linuxppc-dev

User space DTLB miss represent approximatly 90% of TLB misses
so make it the shortest path.

Also remove an unneccessary double jump in FixupDAR

Before this patch, we spend 3.3 TB ticks in the handler for each
user address miss and 3.4 TB ticks for each kernel address miss
After this patch, we send 3.0 TB ticks in the handler for each
user address miss and 3.9 TB ticks for each kernel address miss
Taking into account that user misses represent 90% of the total,
this patch provides an improvement of approx. 9%

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/kernel/head_8xx.S | 53 ++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9cc240d..bfe4907 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -384,30 +384,31 @@ InstructionTLBMiss:
 
 	. = 0x1200
 DataStoreTLBMiss:
+	mtspr	SPRN_SPRG_SCRATCH2, r3
 	EXCEPTION_PROLOG_0
-	mfcr	r10
+	mfcr	r3
 
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 */
-	mfspr	r11, SPRN_MD_EPN
-	rlwinm	r11, r11, 16, 0xfff8
+	mfspr	r10, SPRN_MD_EPN
+	rlwinm	r10, r10, 16, 0xfff8
+	cmpli	cr0, r10, PAGE_OFFSET@h
+	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
+	blt+	3f
 #ifndef CONFIG_PIN_TLB_IMMR
-	cmpli	cr0, r11, VIRT_IMMR_BASE@h
+	cmpli	cr0, r10, VIRT_IMMR_BASE@h
 #endif
-	cmpli	cr7, r11, PAGE_OFFSET@h
+_ENTRY(DTLBMiss_cmp)
+	cmpli	cr7, r10, (PAGE_OFFSET + 0x1800000)@h
+	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 #ifndef CONFIG_PIN_TLB_IMMR
 _ENTRY(DTLBMiss_jmp)
 	beq-	DTLBMissIMMR
 #endif
-	bge-	cr7, DTLBMissLinear
-
-	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
+	blt	cr7, DTLBMissLinear
 3:
-	mtcr	r10
-#ifdef CONFIG_8xx_CPU6
-	mtspr	SPRN_SPRG_SCRATCH2, r3
-#endif
+	mtcr	r3
 	mfspr	r10, SPRN_MD_EPN
 
 	/* Insert level 1 index */
@@ -460,9 +461,7 @@ _ENTRY(DTLBMiss_jmp)
 	MTSPR_CPU6(SPRN_MD_RPN, r10, r3)	/* Update TLB entry */
 
 	/* Restore registers */
-#ifdef CONFIG_8xx_CPU6
 	mfspr	r3, SPRN_SPRG_SCRATCH2
-#endif
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
 	EXCEPTION_EPILOG_0
 	rfi
@@ -533,7 +532,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  * not enough space in the DataStoreTLBMiss area.
  */
 DTLBMissIMMR:
-	mtcr	r10
+	mtcr	r3
 	/* Set 512k byte guarded page and mark it valid */
 	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID
 	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
@@ -545,27 +544,23 @@ DTLBMissIMMR:
 
 	li	r11, RPN_PATTERN
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r3, SPRN_SPRG_SCRATCH2
 	EXCEPTION_EPILOG_0
 	rfi
 
 DTLBMissLinear:
-_ENTRY(DTLBMiss_cmp)
-	cmpli	cr0, r11, (PAGE_OFFSET + 0x1800000)@h
-	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
-	bge-	3b
-
-	mtcr	r10
+	mtcr	r3
 	/* Set 8M byte page and mark it valid */
-	li	r10, MD_PS8MEG | MD_SVALID
-	MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
-	mfspr	r10, SPRN_MD_EPN
-	rlwinm	r10, r10, 0, 0x0f800000		/* 8xx supports max 256Mb RAM */
+	li	r11, MD_PS8MEG | MD_SVALID
+	MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+	rlwinm	r10, r10, 16, 0x0f800000	/* 8xx supports max 256Mb RAM */
 	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY	| \
 			  _PAGE_PRESENT
 	MTSPR_CPU6(SPRN_MD_RPN, r10, r11)	/* Update TLB entry */
 
 	li	r11, RPN_PATTERN
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r3, SPRN_SPRG_SCRATCH2
 	EXCEPTION_EPILOG_0
 	rfi
 
@@ -585,7 +580,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	rlwinm	r11, r10, 16, 0xfff8
 _ENTRY(FixupDAR_cmp)
 	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
-	blt-	cr7, 200f
+	/* create physical page address from effective address */
+	tophys(r11, r10)
+	blt-	cr7, 201f
 	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 	/* Insert level 1 index */
 3:	rlwimi	r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -615,10 +612,6 @@ _ENTRY(FixupDAR_cmp)
 141:	mfspr	r10,SPRN_SPRG_SCRATCH2
 	b	DARFixed	/* Nope, go back to normal TLB processing */
 
-	/* create physical page address from effective address */
-200:	tophys(r11, r10)
-	b	201b
-
 144:	mfspr	r10, SPRN_DSISR
 	rlwinm	r10, r10,0,7,5	/* Clear store bit for buggy dcbst insn */
 	mtspr	SPRN_DSISR, r10
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-09-16  7:02 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-16  6:42 [PATCH v2 0/3] Optimisation on 8xx prior to hugepage implementation Christophe Leroy
2016-09-16  6:42 ` [PATCH v2 1/3] powerpc/8xx: use r3 to scratch CR in ITLBmiss Christophe Leroy
2016-09-16  6:42 ` [PATCH v2 2/3] powerpc/8xx: Move additional DTLBMiss handlers out of exception area Christophe Leroy
2016-09-16  6:42 ` [PATCH v2 3/3] powerpc/8xx: make user addr DTLB miss the short path Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).