LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] powerpc: Use the new generic strncpy_from_user() and strnlen_user()
From: Paul Mackerras @ 2012-05-28  3:03 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linuxppc-dev, David Miller
In-Reply-To: <20120528025956.GA6822@bloggs.ozlabs.ibm.com>

This is much the same as for SPARC except that we can do the find_zero()
function more efficiently using the count-leading-zeroes instructions.
Tested on 32-bit and 64-bit PowerPC.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
I'm sending this in without Ben H's ack as he is away for a few weeks for
medical reasons.

 arch/powerpc/Kconfig                      |    2 ++
 arch/powerpc/include/asm/uaccess.h        |   41 ++++----------------------
 arch/powerpc/include/asm/word-at-a-time.h |   41 ++++++++++++++++++++++++++
 arch/powerpc/kernel/ppc_ksyms.c           |    2 --
 arch/powerpc/lib/string.S                 |   45 -----------------------------
 5 files changed, 48 insertions(+), 83 deletions(-)
 create mode 100644 arch/powerpc/include/asm/word-at-a-time.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 00b9874..050cb37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,6 +135,8 @@ config PPC
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index bd0fb84..17bb40c 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -40,6 +40,8 @@
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
+#define user_addr_max()	(get_fs().seg)
+
 #ifdef __powerpc64__
 /*
  * This check is sufficient because there is a large enough
@@ -453,42 +455,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
 	return size;
 }
 
-extern int __strncpy_from_user(char *dst, const char __user *src, long count);
-
-static inline long strncpy_from_user(char *dst, const char __user *src,
-		long count)
-{
-	might_sleep();
-	if (likely(access_ok(VERIFY_READ, src, 1)))
-		return __strncpy_from_user(dst, src, count);
-	return -EFAULT;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 for error
- */
-extern int __strnlen_user(const char __user *str, long len, unsigned long top);
-
-/*
- * Returns the length of the string at str (including the null byte),
- * or 0 if we hit a page we can't access,
- * or something > len if we didn't find a null byte.
- *
- * The `top' parameter to __strnlen_user is to make sure that
- * we can never overflow from the user area into kernel space.
- */
-static inline int strnlen_user(const char __user *str, long len)
-{
-	unsigned long top = current->thread.fs.seg;
-
-	if ((unsigned long)str > top)
-		return 0;
-	return __strnlen_user(str, len, top);
-}
-
-#define strlen_user(str)	strnlen_user((str), 0x7ffffffe)
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif  /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
new file mode 100644
index 0000000..d0b6d4a
--- /dev/null
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * Word-at-a-time interfaces for PowerPC.
+ */
+
+#include <linux/kernel.h>
+#include <asm/asm-compat.h>
+
+struct word_at_a_time {
+	const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->low_bits) + c->low_bits;
+	return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+	long leading_zero_bits;
+
+	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
+	return leading_zero_bits >> 3;
+}
+
+static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long rhs = val | c->low_bits;
+	*data = rhs;
+	return (val + c->high_bits) & ~rhs;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d1f2aaf..3e40315 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -85,8 +85,6 @@ EXPORT_SYMBOL(csum_tcpudp_magic);
 
 EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(copy_page);
 
 #if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 455881a..093d631 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -160,48 +160,3 @@ _GLOBAL(__clear_user)
 	PPC_LONG	1b,91b
 	PPC_LONG	8b,92b
 	.text
-
-_GLOBAL(__strncpy_from_user)
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-	cmpwi	0,r5,0
-	beq	2f
-	mtctr	r5
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r6)
-	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
-	beq	3f
-2:	addi	r6,r6,1
-3:	subf	r3,r3,r6
-	blr
-99:	li	r3,-EFAULT
-	blr
-
-	.section __ex_table,"a"
-	PPC_LONG	1b,99b
-	.text
-
-/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
-_GLOBAL(__strnlen_user)
-	addi	r7,r3,-1
-	subf	r6,r7,r5	/* top+1 - str */
-	cmplw	0,r4,r6
-	bge	0f
-	mr	r6,r4
-0:	mtctr	r6		/* ctr = min(len, top - str) */
-1:	lbzu	r0,1(r7)	/* get next byte */
-	cmpwi	0,r0,0
-	bdnzf	2,1b		/* loop if --ctr != 0 && byte != 0 */
-	addi	r7,r7,1
-	subf	r3,r3,r7	/* number of bytes we have looked at */
-	beqlr			/* return if we found a 0 byte */
-	cmpw	0,r3,r4		/* did we look at all len bytes? */
-	blt	99f		/* if not, must have hit top */
-	addi	r3,r4,1		/* return len + 1 to indicate no null found */
-	blr
-99:	li	r3,0		/* bad address, return 0 */
-	blr
-
-	.section __ex_table,"a"
-	PPC_LONG	1b,99b
-- 
1.7.10.rc3.219.g53414

^ permalink raw reply related

* Re: [PATCH 1/2] lib: Fix generic strnlen_user for 32-bit big-endian machines
From: David Miller @ 2012-05-28  3:55 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev, torvalds
In-Reply-To: <20120528025956.GA6822@bloggs.ozlabs.ibm.com>

From: Paul Mackerras <paulus@samba.org>
Date: Mon, 28 May 2012 12:59:56 +1000

> The aligned_byte_mask() definition is wrong for 32-bit big-endian
> machines: the "7-(n)" part of the definition assumes a long is 8
> bytes.  This fixes it by using BITS_PER_LONG - 8 instead of 8*7.
> Tested on 32-bit and 64-bit PowerPC.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: [PATCH 2/2] powerpc: Use the new generic strncpy_from_user() and strnlen_user()
From: David Miller @ 2012-05-28  3:56 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev, torvalds
In-Reply-To: <20120528030347.GB6822@bloggs.ozlabs.ibm.com>

From: Paul Mackerras <paulus@samba.org>
Date: Mon, 28 May 2012 13:03:47 +1000

> This is much the same as for SPARC except that we can do the find_zero()
> function more efficiently using the count-leading-zeroes instructions.
> Tested on 32-bit and 64-bit PowerPC.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>

Looks great.

Acked-by: David S. Miller <davem@davemloft.net>

I might hack sparc64 to use 'lzd' or 'popc' on newer chips via code
patching at some point.

^ permalink raw reply

* [PATCH] powerpc: 64bit optimised __clear_user
From: Anton Blanchard @ 2012-05-28  5:54 UTC (permalink / raw)
  To: benh, paulus, michael, linuxppc-dev


I noticed __clear_user high up in a profile of one of my RAID stress
tests. The testcase was doing a dd from /dev/zero which ends up
calling __clear_user.

__clear_user is basically a loop with a single 4 byte store which
is horribly slow. We can do much better by aligning the desination
and doing 32 bytes of 8 byte stores in a loop.

The following testcase was used to verify the patch:

http://ozlabs.org/~anton/junkcode/stress_clear_user.c

To show the improvement in performance I ran a dd from /dev/zero
to /dev/null on a POWER7 box:

Before:

# dd if=/dev/zero of=/dev/null bs=1M count=10000
10485760000 bytes (10 GB) copied, 3.72379 s, 2.8 GB/s

After:

# time dd if=/dev/zero of=/dev/null bs=1M count=10000
10485760000 bytes (10 GB) copied, 0.728318 s, 14.4 GB/s

Over 5x faster.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Interestingly, it picked up an issue with the old clear_user which
fails when we are less than 4 bytes to the end of a page and the
next page is unmapped:

offset 4094 length 526 expected 2 got -1
expected 0x00 at offset 4094, got 0xff
expected 0x00 at offset 4095, got 0xff

We should fix that.

Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile	2012-05-28 10:59:09.281806751 +1000
+++ linux-build/arch/powerpc/lib/Makefile	2012-05-28 11:02:35.017452778 +1000
@@ -17,7 +17,7 @@ obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
 			   checksum_wrappers_64.o hweight_64.o \
-			   copyuser_power7.o
+			   copyuser_power7.o string_64.o
 obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
 obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
Index: linux-build/arch/powerpc/lib/string_64.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/string_64.S	2012-05-28 14:56:03.937833406 +1000
@@ -0,0 +1,141 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+	.macro err1
+100:
+	.section __ex_table,"a"
+	.align 3
+	.llong 100b,.Ldo_err1
+	.previous
+	.endm
+
+	.macro err2
+200:
+	.section __ex_table,"a"
+	.align 3
+	.llong 200b,.Ldo_err2
+	.previous
+	.endm
+
+	.macro err3
+300:
+	.section __ex_table,"a"
+	.align 3
+	.llong 300b,.Ldo_err3
+	.previous
+	.endm
+
+.Ldo_err1:
+	mr	r3,r8
+
+.Ldo_err2:
+	mtctr	r4
+1:
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+	addi	r4,r4,-1
+	bdnz	1b
+
+.Ldo_err3:
+	mr	r3,r4
+	blr
+
+_GLOBAL(__clear_user)
+	cmpdi	r4,32
+	neg	r6,r3
+	li	r0,0
+	blt	.Lshort_clear
+	mr	r8,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	/* Get the destination 8 byte aligned */
+	bf	cr7*4+3,1f
+err1;	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	sub	r4,r4,r6
+	srdi	r6,r4,5
+	cmpdi	r4,32
+	blt	.Lshort_clear
+	mtctr	r6
+
+	/* Do 32 byte chunks */
+4:
+err2;	std	r0,0(r3)
+err2;	std	r0,8(r3)
+err2;	std	r0,16(r3)
+err2;	std	r0,24(r3)
+	addi	r3,r3,32
+	addi	r4,r4,-32
+	bdnz	4b
+
+.Lshort_clear:
+	/* up to 31 bytes to go */
+	cmpdi	r4,16
+	blt	6f
+err2;	std	r0,0(r3)
+err2;	std	r0,8(r3)
+	addi	r3,r3,16
+	addi	r4,r4,-16
+
+	/* Up to 15 bytes to go */
+6:	mr	r8,r3
+	clrldi	r4,r4,(64-4)
+	mtocrf	0x01,r4
+	bf	cr7*4+0,7f
+err1;	std	r0,0(r3)
+	addi	r3,r3,8
+
+7:	bf	cr7*4+1,8f
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+8:	bf	cr7*4+2,9f
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+9:	bf	cr7*4+3,10f
+err1;	stb	r0,0(r3)
+
+10:	li	r3,0
+	blr
Index: linux-build/arch/powerpc/lib/string.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/string.S	2011-09-07 15:15:49.146459439 +1000
+++ linux-build/arch/powerpc/lib/string.S	2012-05-28 11:01:28.728249934 +1000
@@ -119,6 +119,7 @@ _GLOBAL(memchr)
 2:	li	r3,0
 	blr
 
+#ifdef CONFIG_PPC32
 _GLOBAL(__clear_user)
 	addi	r6,r3,-4
 	li	r3,0
@@ -160,6 +161,7 @@ _GLOBAL(__clear_user)
 	PPC_LONG	1b,91b
 	PPC_LONG	8b,92b
 	.text
+#endif
 
 _GLOBAL(__strncpy_from_user)
 	addi	r6,r3,-1

^ permalink raw reply

* [RFC PATCH 09/10] POWERPC: smp: remove call to ipi_call_lock()/ipi_call_unlock()
From: Yong Zhang @ 2012-05-29  7:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: axboe, sshtylyov, nikunj, david.daney, peterz, akpm, ralf,
	Paul Mackerras, srivatsa.bhat, tglx, paulmck, linuxppc-dev, mingo
In-Reply-To: <1338275765-3217-1-git-send-email-yong.zhang0@gmail.com>

From: Yong Zhang <yong.zhang@windriver.com>

1) call_function.lock used in smp_call_function_many() is just to protect
   call_function.queue and &data->refs, cpu_online_mask is outside of the
   lock. And it's not necessary to protect cpu_online_mask,
   because data->cpumask is pre-calculate and even if a cpu is brougt up
   when calling arch_send_call_function_ipi_mask(), it's harmless because
   validation test in generic_smp_call_function_interrupt() will take care
   of it.

2) For cpu down issue, stop_machine() will guarantee that no concurrent
   smp_call_fuction() is processing.

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/kernel/smp.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb343..e1417c4 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
 	if (system_state == SYSTEM_RUNNING)
 		vdso_data->processorCount++;
 #endif
-	ipi_call_lock();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 	/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
 		of_node_put(np);
 	}
 	of_node_put(l2_cache);
-	ipi_call_unlock();
 
 	local_irq_enable();
 
-- 
1.7.5.4

^ permalink raw reply related

* Re: [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
From: Li Yang @ 2012-05-29  7:30 UTC (permalink / raw)
  To: Zhao Chenhui; +Cc: scottwood, linuxppc-dev, linux-kernel
In-Reply-To: <1336737235-15370-1-git-send-email-chenhui.zhao@freescale.com>

Hi Scott,

Thanks for the valuable comment raised before and we have updated the
patches accordingly.  Please review the updated patch set and ACK if
they are good to you.  We hope it can be applied in this window.

Leo

On Fri, May 11, 2012 at 7:53 PM, Zhao Chenhui
<chenhui.zhao@freescale.com> wrote:
> Do hardware timebase sync. Firstly, stop all timebases, and transfer
> the timebase value of the boot core to the other core. Finally,
> start all timebases.
>
> Only apply to dual-core chips, such as MPC8572, P2020, etc.
>
> Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> ---
> =C2=A0arch/powerpc/include/asm/fsl_guts.h | =C2=A0 =C2=A02 +
> =C2=A0arch/powerpc/platforms/85xx/smp.c =C2=A0 | =C2=A0 93 ++++++++++++++=
+++++++++++++++++++--
> =C2=A02 files changed, 91 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/a=
sm/fsl_guts.h
> index aa4c488..dd5ba2c 100644
> --- a/arch/powerpc/include/asm/fsl_guts.h
> +++ b/arch/powerpc/include/asm/fsl_guts.h
> @@ -48,6 +48,8 @@ struct ccsr_guts {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 __be32 =C2=A0dmuxcr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0/* 0x.0068 - DMA Mux Control Register */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 u8 =C2=A0 =C2=A0 res06c[0x70 - 0x6c];
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0devdisr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0/* 0x.0070 - Device Disable Control */
> +#define CCSR_GUTS_DEVDISR_TB1 =C2=A00x00001000
> +#define CCSR_GUTS_DEVDISR_TB0 =C2=A00x00004000
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0devdisr2; =C2=A0 =C2=A0 =C2=A0 /*=
 0x.0074 - Device Disable Control 2 */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0u8 =C2=A0 =C2=A0 =C2=A0res078[0x7c - 0x78];
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0pmjcr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0/* 0x.007c - 4 Power Management Jog Control Register */
> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/8=
5xx/smp.c
> index ff42490..6862dda 100644
> --- a/arch/powerpc/platforms/85xx/smp.c
> +++ b/arch/powerpc/platforms/85xx/smp.c
> @@ -24,6 +24,7 @@
> =C2=A0#include <asm/mpic.h>
> =C2=A0#include <asm/cacheflush.h>
> =C2=A0#include <asm/dbell.h>
> +#include <asm/fsl_guts.h>
>
> =C2=A0#include <sysdev/fsl_soc.h>
> =C2=A0#include <sysdev/mpic.h>
> @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr)
>
> =C2=A0struct smp_ops_t smp_85xx_ops =3D {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0.kick_cpu =3D smp_85xx_kick_cpu,
> -#ifdef CONFIG_KEXEC
> - =C2=A0 =C2=A0 =C2=A0 .give_timebase =C2=A0=3D smp_generic_give_timebase=
,
> - =C2=A0 =C2=A0 =C2=A0 .take_timebase =C2=A0=3D smp_generic_take_timebase=
,
> -#endif
> =C2=A0};
>
> =C2=A0#ifdef CONFIG_KEXEC
> +static struct ccsr_guts __iomem *guts;
> +static u64 timebase;
> +static int tb_req;
> +static int tb_valid;
> +
> +static void mpc85xx_timebase_freeze(int freeze)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned int mask;
> +
> + =C2=A0 =C2=A0 =C2=A0 if (!guts)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return;
> +
> + =C2=A0 =C2=A0 =C2=A0 mask =3D CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR=
_TB1;
> + =C2=A0 =C2=A0 =C2=A0 if (freeze)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 setbits32(&guts->devdi=
sr, mask);
> + =C2=A0 =C2=A0 =C2=A0 else
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 clrbits32(&guts->devdi=
sr, mask);
> +
> + =C2=A0 =C2=A0 =C2=A0 in_be32(&guts->devdisr);
> +}
> +
> +static void mpc85xx_give_timebase(void)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_save(flags);
> +
> + =C2=A0 =C2=A0 =C2=A0 while (!tb_req)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> + =C2=A0 =C2=A0 =C2=A0 tb_req =3D 0;
> +
> + =C2=A0 =C2=A0 =C2=A0 mpc85xx_timebase_freeze(1);
> + =C2=A0 =C2=A0 =C2=A0 timebase =3D get_tb();
> + =C2=A0 =C2=A0 =C2=A0 mb();
> + =C2=A0 =C2=A0 =C2=A0 tb_valid =3D 1;
> +
> + =C2=A0 =C2=A0 =C2=A0 while (tb_valid)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> +
> + =C2=A0 =C2=A0 =C2=A0 mpc85xx_timebase_freeze(0);
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_restore(flags);
> +}
> +
> +static void mpc85xx_take_timebase(void)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_save(flags);
> +
> + =C2=A0 =C2=A0 =C2=A0 tb_req =3D 1;
> + =C2=A0 =C2=A0 =C2=A0 while (!tb_valid)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> +
> + =C2=A0 =C2=A0 =C2=A0 set_tb(timebase >> 32, timebase & 0xffffffff);
> + =C2=A0 =C2=A0 =C2=A0 mb();
> + =C2=A0 =C2=A0 =C2=A0 tb_valid =3D 0;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_restore(flags);
> +}
> +
> =C2=A0atomic_t kexec_down_cpus =3D ATOMIC_INIT(0);
>
> =C2=A0void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
> @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr)
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0doorbell_setup_thi=
s_cpu();
> =C2=A0}
>
> +#ifdef CONFIG_KEXEC
> +static const struct of_device_id guts_ids[] =3D {
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8572-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8560-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8536-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1020-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1021-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1022-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1023-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p2020-guts", },
> + =C2=A0 =C2=A0 =C2=A0 {},
> +};
> +#endif
> +
> =C2=A0void __init mpc85xx_smp_init(void)
> =C2=A0{
> =C2=A0 =C2=A0 =C2=A0 =C2=A0struct device_node *np;
> @@ -249,6 +321,19 @@ void __init mpc85xx_smp_init(void)
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0smp_85xx_ops.cause=
_ipi =3D doorbell_cause_ipi;
> =C2=A0 =C2=A0 =C2=A0 =C2=A0}
>
> +#ifdef CONFIG_KEXEC
> + =C2=A0 =C2=A0 =C2=A0 np =3D of_find_matching_node(NULL, guts_ids);
> + =C2=A0 =C2=A0 =C2=A0 if (np) {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 guts =3D of_iomap(np, =
0);
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.give_time=
base =3D mpc85xx_give_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.take_time=
base =3D mpc85xx_take_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 of_node_put(np);
> + =C2=A0 =C2=A0 =C2=A0 } else {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.give_time=
base =3D smp_generic_give_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.take_time=
base =3D smp_generic_take_timebase;
> + =C2=A0 =C2=A0 =C2=A0 }
> +#endif
> +
> =C2=A0 =C2=A0 =C2=A0 =C2=A0smp_ops =3D &smp_85xx_ops;
>
> =C2=A0#ifdef CONFIG_KEXEC
> --
> 1.6.4.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" i=
n
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at =C2=A0http://www.tux.org/lkml/



--=20
- Leo

^ permalink raw reply

* [PATCH] powerpc: Use enhanced touch instructions in POWER7 copy_to_user/copy_from_user
From: Anton Blanchard @ 2012-05-29  8:14 UTC (permalink / raw)
  To: benh, paulus, michael, amodra; +Cc: linuxppc-dev


Version 2.06 of the POWER ISA introduced enhanced touch instructions,
allowing us to specify a number of attributes including the length of
a stream.

This patch adds a software stream for both loads and stores in the
POWER7 copy_tofrom_user loop. Since the setup is quite complicated
and we have to use an eieio to ensure correct ordering of the "GO"
command we only do this for copies above 4kB.

To quantify any performance improvements we need a working set
bigger than the caches so we operate on a 1GB file:

# dd if=/dev/zero of=/tmp/foo bs=1M count=1024

And we compare how fast we can read the file:

# dd if=/tmp/foo of=/dev/null bs=1M

before: 7.7 GB/s
after:  9.6 GB/s

A 25% improvement.

The worst case for this patch will be a completely L1 cache contained
copy of just over 4kB. We can test this with the copy_to_user
testcase we used to tune copy_tofrom_user originally:

http://ozlabs.org/~anton/junkcode/copy_to_user.c

# time ./copy_to_user2 -l 4224 -i 10000000

before: 6.807 s
after:  6.946 s

A 2% slowdown, which seems reasonable considering our data is unlikely
to be completely L1 contained.

Signed-off-by: Anton Blanchard <anton@samba.org>  
---

Index: linux-build/arch/powerpc/lib/copyuser_power7.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7.S	2012-03-01 16:15:31.073813523 +1100
+++ linux-build/arch/powerpc/lib/copyuser_power7.S	2012-05-29 16:14:44.129704887 +1000
@@ -298,6 +298,37 @@ err1;	stb	r0,0(r3)
 	ld	r5,STACKFRAMESIZE+64(r1)
 	mtlr	r0
 
+	/*
+	 * We prefetch both the source and destination using enhanced touch
+	 * instructions. We use a stream ID of 0 for the load side and
+	 * 1 for the store side.
+	 */
+	clrrdi	r6,r4,7
+	clrrdi	r9,r3,7
+	ori	r9,r9,1		/* stream=1 */
+
+	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
+	cmpldi	r7,0x3FF
+	ble	1f
+	li	r7,0x3FF
+1:	lis	r0,0x0E00	/* depth=7 */
+	sldi	r7,r7,7
+	or	r7,r7,r0
+	ori	r10,r7,1	/* stream=1 */
+
+	lis	r8,0x8000	/* GO=1 */
+	clrldi	r8,r8,32
+
+.machine push
+.machine "power4"
+	dcbt	r0,r6,0b01000
+	dcbt	r0,r7,0b01010
+	dcbtst	r0,r9,0b01000
+	dcbtst	r0,r10,0b01010
+	eieio
+	dcbt	r0,r8,0b01010	/* GO */
+.machine pop
+
 	beq	.Lunwind_stack_nonvmx_copy
 
 	/*

^ permalink raw reply

* RE: pread() and pwrite() system calls
From: David Laight @ 2012-05-29  8:28 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc-dev
In-Reply-To: <20120525164550.GA32406@visitor2.iram.es>

> > A special pread/pwrite asm stub that just copies
> > r7 to r0 could be used.
> >=20
> > Would it be enough to do:
> > syscall_pread_pwrite:
> > 	mov 0,7
> > 	sc
> > 	blr
> > and handle the -ve -> errno in C?
>=20
> Huh? Won't fly, r0 is used for the system call number!

I was copying that from r7!
Actually I have a much better stub by copying the one
used for mmap().
The system call itself is fine.
Using the system call almost halved the time taken
for a 4-byte read.

> On the other hand, I believed PPC had no problems passing
> up to 8 32 bit arguments in registers (r3 to r10), but
> I may be confusing with the standard ABI for function calls.
>=20
> Hmm, a quick look at kernel/entry_32.s shows that it should=20
> be able to use at least r3 to r8, which should be sufficient.
>=20
> I think that it is an uClibc problem.

True, in that it isn't a kernel bug.
OTOH the kernel is likely to get blamed for non-atomic pread.
I've found the same user-space code in newlib as well.
glibc may be ok, some code I've found implies it only
uses the 'emulation' when the system call returns ENOSYS.

	David

^ permalink raw reply

* RE: pread() and pwrite() system calls
From: Michael Ellerman @ 2012-05-29  8:54 UTC (permalink / raw)
  To: David Laight; +Cc: linuxppc-dev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B6F29@saturn3.aculab.com>

On Tue, 2012-05-29 at 09:28 +0100, David Laight wrote:
> > > A special pread/pwrite asm stub that just copies
> > > r7 to r0 could be used.
> > > 
> > > Would it be enough to do:
> > > syscall_pread_pwrite:
> > > 	mov 0,7
> > > 	sc
> > > 	blr
> > > and handle the -ve -> errno in C?
> > 
> > Huh? Won't fly, r0 is used for the system call number!
> 
> I was copying that from r7!

Using the non-existant 'mov' instruction!

or 0,7,7

is much clearer :)

cheers

^ permalink raw reply

* [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Anton Blanchard @ 2012-05-29 11:20 UTC (permalink / raw)
  To: benh, paulus, michael; +Cc: linuxppc-dev


In system call exit we currently clear RI and EE separately. An
mtmsrd is a slow operation and we can save cycles by doing it all
in one go.

This does complicate things a bit - we have to be careful to restore
RI if we branch out before returning to userspace.

On a POWER7 with virtual cputime disabled this patch improves the
null system call by 7%.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/entry_64.S	2012-05-28 18:23:33.374451416 +1000
+++ linux-build/arch/powerpc/kernel/entry_64.S	2012-05-29 21:18:22.280934940 +1000
@@ -197,7 +197,16 @@ syscall_exit:
 	wrteei	0
 #else
 	ld	r10,PACAKMSR(r13)
-	mtmsrd	r10,1
+	/*
+	 * For performance reasons we clear RI the same time that we
+	 * clear EE. We only need to clear RI just before we restore r13
+	 * below, but batching it with EE saves us one expensive mtmsrd call.
+	 * We have to be careful to restore RI if we branch anywhere from
+	 * here (eg syscall_exit_work).
+	 */
+	li	r9,MSR_RI
+	andc	r11,r10,r9
+	mtmsrd	r11,1
 #endif /* CONFIG_PPC_BOOK3E */
 
 	ld	r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	andi.	r6,r8,MSR_PR
 	ld	r4,_LINK(r1)
-	/*
-	 * Clear RI before restoring r13.  If we are returning to
-	 * userspace and we take an exception after restoring r13,
-	 * we end up corrupting the userspace r13 value.
-	 */
-#ifdef CONFIG_PPC_BOOK3S
-	/* No MSR:RI on BookE */
-	li	r12,MSR_RI
-	andc	r11,r10,r12
-	mtmsrd	r11,1			/* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
 
 	beq-	1f
 	ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,7 @@ syscall_enosys:
 	b	syscall_exit
 	
 syscall_exit_work:
+	mtmsrd	r10,1		/* Restore RI */
 	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
 	 If TIF_NOERROR is set, just save r3 as it is. */
 

^ permalink raw reply

* MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Joakim Tjernlund @ 2012-05-29 12:00 UTC (permalink / raw)
  To: linuxppc-dev


I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
MSR[DE] is off quite often by just stopping at random times and looking at MSR so
it seems like the kernel is turning MSR[DE] off most of the time.
Anyone else having success debugging 3.3 with BDI2000?

This is on a P2010(E500/BOOKE) CPU.

^ permalink raw reply

* RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
From: Zhao Chenhui-B35336 @ 2012-05-29 12:20 UTC (permalink / raw)
  To: galak@kernel.crashing.org
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <1336737235-15370-1-git-send-email-chenhui.zhao@freescale.com>

Hi Kumar,

There is no comment for these patches so far. Do you think these patches ca=
n be merged?
We really want these patches to be merged in this merge window.

Thanks.

Best Regards,
Chenhui


> -----Original Message-----
> From: Zhao Chenhui-B35336
> Sent: Friday, May 25, 2012 3:09 PM
> To: Wood Scott-B07421; galak@kernel.crashing.org
> Cc: Li Yang-R58472
> Subject: RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement ha=
rdware timebase sync
>=20
> Hi Scott and Kumar,
>=20
> Do you have comments for these patches?
>=20
> http://patchwork.ozlabs.org/patch/158484/
> http://patchwork.ozlabs.org/patch/158485/
> http://patchwork.ozlabs.org/patch/158487/
> http://patchwork.ozlabs.org/patch/158486/
> http://patchwork.ozlabs.org/patch/158488/
>=20
> Thanks.
>=20
> Best Regards,
> Chenhui
>=20
> > -----Original Message-----
> > From: linuxppc-release-bounces@linux.freescale.net [mailto:linuxppc-rel=
ease-
> > bounces@linux.freescale.net] On Behalf Of Zhao Chenhui-B35336
> > Sent: Friday, May 11, 2012 7:54 PM
> > To: linuxppc-dev@lists.ozlabs.org
> > Cc: Wood Scott-B07421; Li Yang-R58472; linux-kernel@vger.kernel.org; ga=
lak@kernel.crashing.org
> > Subject: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hard=
ware timebase sync
> >
> > Do hardware timebase sync. Firstly, stop all timebases, and transfer
> > the timebase value of the boot core to the other core. Finally,
> > start all timebases.
> >
> > Only apply to dual-core chips, such as MPC8572, P2020, etc.
> >
> > Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> > Signed-off-by: Li Yang <leoli@freescale.com>
> > ---
> >  arch/powerpc/include/asm/fsl_guts.h |    2 +
> >  arch/powerpc/platforms/85xx/smp.c   |   93 +++++++++++++++++++++++++++=
++++++--
> >  2 files changed, 91 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include=
/asm/fsl_guts.h
> > index aa4c488..dd5ba2c 100644
> > --- a/arch/powerpc/include/asm/fsl_guts.h
> > +++ b/arch/powerpc/include/asm/fsl_guts.h
> > @@ -48,6 +48,8 @@ struct ccsr_guts {
> >          __be32  dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
> >          u8	res06c[0x70 - 0x6c];
> >  	__be32	devdisr;	/* 0x.0070 - Device Disable Control */
> > +#define CCSR_GUTS_DEVDISR_TB1	0x00001000
> > +#define CCSR_GUTS_DEVDISR_TB0	0x00004000
> >  	__be32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
> >  	u8	res078[0x7c - 0x78];
> >  	__be32  pmjcr;		/* 0x.007c - 4 Power Management Jog Control Register =
*/
> > diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms=
/85xx/smp.c
> > index ff42490..6862dda 100644
> > --- a/arch/powerpc/platforms/85xx/smp.c
> > +++ b/arch/powerpc/platforms/85xx/smp.c
> > @@ -24,6 +24,7 @@
> >  #include <asm/mpic.h>
> >  #include <asm/cacheflush.h>
> >  #include <asm/dbell.h>
> > +#include <asm/fsl_guts.h>
> >
> >  #include <sysdev/fsl_soc.h>
> >  #include <sysdev/mpic.h>
> > @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr)
> >
> >  struct smp_ops_t smp_85xx_ops =3D {
> >  	.kick_cpu =3D smp_85xx_kick_cpu,
> > -#ifdef CONFIG_KEXEC
> > -	.give_timebase	=3D smp_generic_give_timebase,
> > -	.take_timebase	=3D smp_generic_take_timebase,
> > -#endif
> >  };
> >
> >  #ifdef CONFIG_KEXEC
> > +static struct ccsr_guts __iomem *guts;
> > +static u64 timebase;
> > +static int tb_req;
> > +static int tb_valid;
> > +
> > +static void mpc85xx_timebase_freeze(int freeze)
> > +{
> > +	unsigned int mask;
> > +
> > +	if (!guts)
> > +		return;
> > +
> > +	mask =3D CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
> > +	if (freeze)
> > +		setbits32(&guts->devdisr, mask);
> > +	else
> > +		clrbits32(&guts->devdisr, mask);
> > +
> > +	in_be32(&guts->devdisr);
> > +}
> > +
> > +static void mpc85xx_give_timebase(void)
> > +{
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +
> > +	while (!tb_req)
> > +		barrier();
> > +	tb_req =3D 0;
> > +
> > +	mpc85xx_timebase_freeze(1);
> > +	timebase =3D get_tb();
> > +	mb();
> > +	tb_valid =3D 1;
> > +
> > +	while (tb_valid)
> > +		barrier();
> > +
> > +	mpc85xx_timebase_freeze(0);
> > +
> > +	local_irq_restore(flags);
> > +}
> > +
> > +static void mpc85xx_take_timebase(void)
> > +{
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +
> > +	tb_req =3D 1;
> > +	while (!tb_valid)
> > +		barrier();
> > +
> > +	set_tb(timebase >> 32, timebase & 0xffffffff);
> > +	mb();
> > +	tb_valid =3D 0;
> > +
> > +	local_irq_restore(flags);
> > +}
> > +
> >  atomic_t kexec_down_cpus =3D ATOMIC_INIT(0);
> >
> >  void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
> > @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr)
> >  		doorbell_setup_this_cpu();
> >  }
> >
> > +#ifdef CONFIG_KEXEC
> > +static const struct of_device_id guts_ids[] =3D {
> > +	{ .compatible =3D "fsl,mpc8572-guts", },
> > +	{ .compatible =3D "fsl,mpc8560-guts", },
> > +	{ .compatible =3D "fsl,mpc8536-guts", },
> > +	{ .compatible =3D "fsl,p1020-guts", },
> > +	{ .compatible =3D "fsl,p1021-guts", },
> > +	{ .compatible =3D "fsl,p1022-guts", },
> > +	{ .compatible =3D "fsl,p1023-guts", },
> > +	{ .compatible =3D "fsl,p2020-guts", },
> > +	{},
> > +};
> > +#endif
> > +
> >  void __init mpc85xx_smp_init(void)
> >  {
> >  	struct device_node *np;
> > @@ -249,6 +321,19 @@ void __init mpc85xx_smp_init(void)
> >  		smp_85xx_ops.cause_ipi =3D doorbell_cause_ipi;
> >  	}
> >
> > +#ifdef CONFIG_KEXEC
> > +	np =3D of_find_matching_node(NULL, guts_ids);
> > +	if (np) {
> > +		guts =3D of_iomap(np, 0);
> > +		smp_85xx_ops.give_timebase =3D mpc85xx_give_timebase;
> > +		smp_85xx_ops.take_timebase =3D mpc85xx_take_timebase;
> > +		of_node_put(np);
> > +	} else {
> > +		smp_85xx_ops.give_timebase =3D smp_generic_give_timebase;
> > +		smp_85xx_ops.take_timebase =3D smp_generic_take_timebase;
> > +	}
> > +#endif
> > +
> >  	smp_ops =3D &smp_85xx_ops;
> >
> >  #ifdef CONFIG_KEXEC
> > --
> > 1.6.4.1
> >
> > _______________________________________________
> > linuxppc-release mailing list
> > linuxppc-release@linux.freescale.net
> > http://linux.freescale.net/mailman/listinfo/linuxppc-release

^ permalink raw reply

* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Scott Wood @ 2012-05-29 18:02 UTC (permalink / raw)
  To: Anthony Foiani
  Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
	Jeff Garzik, Adrian Bunk
In-Reply-To: <g7gvzbfhb.fsf@dworkin.scrye.com>

On 05/26/2012 01:53 AM, Anthony Foiani wrote:
> Li Yang-R58472 <r58472@freescale.com> writes:
> 
>> Thanks for bringing [CONFIG_MPC8315_DS] up again.  Looks like we do
>> have a problem here.
> 
> My impression is that the simplest fix is Adrian's patch, which simply
> keys off CONFIG_MPC831x_RDB.  It's not very satisfying, but I'll take
> "working" vs. "rare lockups at boot".

CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
only that the kernel supports those boards.  It should be a runtime test.

-Scott

^ permalink raw reply

* Re: [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Scott Wood @ 2012-05-29 18:07 UTC (permalink / raw)
  To: Anton Blanchard; +Cc: michael, paulus, linuxppc-dev
In-Reply-To: <20120529212010.0152a083@kryten>

On 05/29/2012 06:20 AM, Anton Blanchard wrote:
> 
> In system call exit we currently clear RI and EE separately. An
> mtmsrd is a slow operation and we can save cycles by doing it all
> in one go.
> 
> This does complicate things a bit - we have to be careful to restore
> RI if we branch out before returning to userspace.
> 
> On a POWER7 with virtual cputime disabled this patch improves the
> null system call by 7%.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> Index: linux-build/arch/powerpc/kernel/entry_64.S
> ===================================================================
> --- linux-build.orig/arch/powerpc/kernel/entry_64.S	2012-05-28 18:23:33.374451416 +1000
> +++ linux-build/arch/powerpc/kernel/entry_64.S	2012-05-29 21:18:22.280934940 +1000
> @@ -197,7 +197,16 @@ syscall_exit:
>  	wrteei	0
>  #else
>  	ld	r10,PACAKMSR(r13)
> -	mtmsrd	r10,1
> +	/*
> +	 * For performance reasons we clear RI the same time that we
> +	 * clear EE. We only need to clear RI just before we restore r13
> +	 * below, but batching it with EE saves us one expensive mtmsrd call.
> +	 * We have to be careful to restore RI if we branch anywhere from
> +	 * here (eg syscall_exit_work).
> +	 */
> +	li	r9,MSR_RI
> +	andc	r11,r10,r9
> +	mtmsrd	r11,1
>  #endif /* CONFIG_PPC_BOOK3E */
>  
>  	ld	r9,TI_FLAGS(r12)
> @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
>  END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>  	andi.	r6,r8,MSR_PR
>  	ld	r4,_LINK(r1)
> -	/*
> -	 * Clear RI before restoring r13.  If we are returning to
> -	 * userspace and we take an exception after restoring r13,
> -	 * we end up corrupting the userspace r13 value.
> -	 */
> -#ifdef CONFIG_PPC_BOOK3S
> -	/* No MSR:RI on BookE */
> -	li	r12,MSR_RI
> -	andc	r11,r10,r12
> -	mtmsrd	r11,1			/* clear MSR.RI */
> -#endif /* CONFIG_PPC_BOOK3S */
>  
>  	beq-	1f
>  	ACCOUNT_CPU_USER_EXIT(r11, r12)
> @@ -271,6 +269,7 @@ syscall_enosys:
>  	b	syscall_exit
>  	
>  syscall_exit_work:
> +	mtmsrd	r10,1		/* Restore RI */

That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S

-Scott

^ permalink raw reply

* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Joakim Tjernlund @ 2012-05-29 18:29 UTC (permalink / raw)
  To: Bob Cochran; +Cc: linuxppc-dev, support
In-Reply-To: <4FC511C1.4050007@mindchasers.com>

Bob Cochran <ppc@mindchasers.com> wrote on 2012/05/29 20:13:21:
>
> On 05/29/2012 08:00 AM, Joakim Tjernlund wrote:
> >
> > I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
> > depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
> > MSR[DE] is off quite often by just stopping at random times and looking at MSR so
> > it seems like the kernel is turning MSR[DE] off most of the time.
> > Anyone else having success debugging 3.3 with BDI2000?
> >
> > This is on a P2010(E500/BOOKE) CPU.
> >
> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-dev
> >
>
> I debug using Freescale CodeWarrior and a USB tap, which also rely on
> MSR[DE] being set.  I develop from the mainline & have a patch set that
> I just recently re-tweaked to support kernel debugging.
>
> If you want, I'll send you my set of patches for the kernel.  They might
> be useful (not sure since I don't use BDI).

Thanks, that could be useful, however I just figured something out.
Changing
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 500fe1d..0cb259b 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -37,7 +37,7 @@
 #define MSR_KERNEL     (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
 #define MSR_USER       (MSR_KERNEL|MSR_PR|MSR_EE)
 #else
-#define MSR_KERNEL     (MSR_ME|MSR_RI|MSR_CE)
+#define MSR_KERNEL     (MSR_ME|MSR_RI|MSR_CE|MSR_DE)
 #define MSR_USER       (MSR_KERNEL|MSR_PR|MSR_EE)
 #endif

Made it work(possible one should change MSR_USER too?)

Question now is why MSR_DE is not on by default? Especially since
BDI2000 is supported by the kernel(CONFIG_BDI_SWITCH=y) is on in my kernel?

 Jocke

^ permalink raw reply related

* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Bob Cochran @ 2012-05-29 18:13 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev
In-Reply-To: <OF00FC5182.FED0E75C-ONC1257A0D.003AA532-C1257A0D.0041F687@transmode.se>

On 05/29/2012 08:00 AM, Joakim Tjernlund wrote:
>
> I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
> depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
> MSR[DE] is off quite often by just stopping at random times and looking at MSR so
> it seems like the kernel is turning MSR[DE] off most of the time.
> Anyone else having success debugging 3.3 with BDI2000?
>
> This is on a P2010(E500/BOOKE) CPU.
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>

I debug using Freescale CodeWarrior and a USB tap, which also rely on 
MSR[DE] being set.  I develop from the mainline & have a patch set that 
I just recently re-tweaked to support kernel debugging.

If you want, I'll send you my set of patches for the kernel.  They might 
be useful (not sure since I don't use BDI).

^ permalink raw reply

* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Wolfgang Denk @ 2012-05-29 20:13 UTC (permalink / raw)
  To: Bob Cochran; +Cc: linuxppc-dev
In-Reply-To: <4FC511C1.4050007@mindchasers.com>

Dear Bob,

In message <4FC511C1.4050007@mindchasers.com> you wrote:
>
> I debug using Freescale CodeWarrior and a USB tap, which also rely on 
> MSR[DE] being set.  I develop from the mainline & have a patch set that 
> I just recently re-tweaked to support kernel debugging.
> 
> If you want, I'll send you my set of patches for the kernel.  They might 
> be useful (not sure since I don't use BDI).

Please rather submit as a patch on the mailing list.

Thanks.

Best regards,

Wolfgang Denk

-- 
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
egrep patterns are full regular expressions; it uses a fast  determi-
nistic algorithm that sometimes needs exponential space.
- unix manuals

^ permalink raw reply

* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Anthony Foiani @ 2012-05-29 22:07 UTC (permalink / raw)
  To: Scott Wood
  Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
	Jeff Garzik, Adrian Bunk
In-Reply-To: <4FC50F22.2040404@freescale.com>

Scott Wood <scottwood@freescale.com> writes:

> CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
> only that the kernel supports those boards.  It should be a runtime
> test.

Point taken.

If that SATA check is CPU/SOC-based, then it should be easy enough to
test.  The cpuinfo for my board is:

  # cat /proc/cpuinfo
  processor       : 0
  cpu             : e300c3
  clock           : 266.666664MHz
  revision        : 2.0 (pvr 8085 0020)
  bogomips        : 66.66
  timebase        : 33333333

On the other hand, if the problem is actually caused by board trace
routing (or other hardware that's outside the control of the CPU/SOC),
then I don't know how possible a runtime check will be.

Do you know if there is a specific errata that the MPC8315_DS ran
across that required this fix, or was it a band-aid in the first
place?

Either way, thanks for looking into this.

Thanks,
Tony

^ permalink raw reply

* [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Anton Blanchard @ 2012-05-29 22:22 UTC (permalink / raw)
  To: benh, paulus, michael, scottwood; +Cc: linuxppc-dev
In-Reply-To: <4FC5104E.1000504@freescale.com>


Hi Scott,

> >  syscall_exit_work:
> > +	mtmsrd	r10,1		/* Restore RI */
> 
> That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S

Thanks!
Anton
--

In system call exit we currently clear RI and EE separately. An
mtmsrd is a slow operation and we can save cycles by doing it all
in one go.

This does complicate things a bit - we have to be careful to restore
RI if we branch out before returning to userspace.

On a POWER7 with virtual cputime disabled this patch improves the
null system call by 7%.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/entry_64.S	2012-05-29 21:22:40.293549055 +1000
+++ linux-build/arch/powerpc/kernel/entry_64.S	2012-05-30 07:47:31.380737406 +1000
@@ -197,7 +197,16 @@ syscall_exit:
 	wrteei	0
 #else
 	ld	r10,PACAKMSR(r13)
-	mtmsrd	r10,1
+	/*
+	 * For performance reasons we clear RI the same time that we
+	 * clear EE. We only need to clear RI just before we restore r13
+	 * below, but batching it with EE saves us one expensive mtmsrd call.
+	 * We have to be careful to restore RI if we branch anywhere from
+	 * here (eg syscall_exit_work).
+	 */
+	li	r9,MSR_RI
+	andc	r11,r10,r9
+	mtmsrd	r11,1
 #endif /* CONFIG_PPC_BOOK3E */
 
 	ld	r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	andi.	r6,r8,MSR_PR
 	ld	r4,_LINK(r1)
-	/*
-	 * Clear RI before restoring r13.  If we are returning to
-	 * userspace and we take an exception after restoring r13,
-	 * we end up corrupting the userspace r13 value.
-	 */
-#ifdef CONFIG_PPC_BOOK3S
-	/* No MSR:RI on BookE */
-	li	r12,MSR_RI
-	andc	r11,r10,r12
-	mtmsrd	r11,1			/* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
 
 	beq-	1f
 	ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,9 @@ syscall_enosys:
 	b	syscall_exit
 	
 syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+	mtmsrd	r10,1		/* Restore RI */
+#endif
 	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
 	 If TIF_NOERROR is set, just save r3 as it is. */
 

^ permalink raw reply

* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Scott Wood @ 2012-05-29 22:57 UTC (permalink / raw)
  To: Anthony Foiani
  Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
	Jeff Garzik, Adrian Bunk
In-Reply-To: <gvcjetze3.fsf@dworkin.scrye.com>

On 05/29/2012 05:07 PM, Anthony Foiani wrote:
> Scott Wood <scottwood@freescale.com> writes:
> 
>> CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
>> only that the kernel supports those boards.  It should be a runtime
>> test.
> 
> Point taken.
> 
> If that SATA check is CPU/SOC-based, then it should be easy enough to
> test.  The cpuinfo for my board is:
> 
>   # cat /proc/cpuinfo
>   processor       : 0
>   cpu             : e300c3
>   clock           : 266.666664MHz
>   revision        : 2.0 (pvr 8085 0020)
>   bogomips        : 66.66
>   timebase        : 33333333
> 
> On the other hand, if the problem is actually caused by board trace
> routing (or other hardware that's outside the control of the CPU/SOC),
> then I don't know how possible a runtime check will be.

Board information is available from the device tree, and from platform
code that was selected based on the device tree.

> Do you know if there is a specific errata that the MPC8315_DS ran
> across that required this fix, or was it a band-aid in the first
> place?

I don't know the history of this, sorry.  It looks like Yang Li added
this code -- Yang, can you answer this?

-Scott

^ permalink raw reply

* [PATCH 1/2] powerpc: Rename copyuser_power7_vmx.c to vmx-helper.c
From: Anton Blanchard @ 2012-05-30  5:31 UTC (permalink / raw)
  To: benh, paulus, michael, linuxppc-dev


Subsequent patches will add more VMX library functions and it makes
sense to keep all the c-code helper functions in the one file.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile	2012-05-30 09:39:59.084233436 +1000
+++ linux-build/arch/powerpc/lib/Makefile	2012-05-30 10:22:32.565764322 +1000
@@ -24,7 +24,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sste
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
-obj-$(CONFIG_ALTIVEC)	+= copyuser_power7_vmx.o
+obj-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 endif
 
 obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
Index: linux-build/arch/powerpc/lib/vmx-helper.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/vmx-helper.c	2012-05-30 10:22:32.577764541 +1000
@@ -0,0 +1,51 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
+ *          Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+
+int enter_vmx_usercopy(void)
+{
+	if (in_interrupt())
+		return 0;
+
+	/* This acts as preempt_disable() as well and will make
+	 * enable_kernel_altivec(). We need to disable page faults
+	 * as they can call schedule and thus make us lose the VMX
+	 * context. So on page faults, we just fail which will cause
+	 * a fallback to the normal non-vmx copy.
+	 */
+	pagefault_disable();
+
+	enable_kernel_altivec();
+
+	return 1;
+}
+
+/*
+ * This function must return 0 because we tail call optimise when calling
+ * from __copy_tofrom_user_power7 which returns 0 on success.
+ */
+int exit_vmx_usercopy(void)
+{
+	pagefault_enable();
+	return 0;
+}
Index: linux-build/arch/powerpc/lib/copyuser_power7_vmx.c
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7_vmx.c	2012-05-28 17:18:38.213091662 +1000
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,51 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2011
- *
- * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
- *          Anton Blanchard <anton@au.ibm.com>
- */
-#include <linux/uaccess.h>
-#include <linux/hardirq.h>
-#include <asm/switch_to.h>
-
-int enter_vmx_copy(void)
-{
-	if (in_interrupt())
-		return 0;
-
-	/* This acts as preempt_disable() as well and will make
-	 * enable_kernel_altivec(). We need to disable page faults
-	 * as they can call schedule and thus make us lose the VMX
-	 * context. So on page faults, we just fail which will cause
-	 * a fallback to the normal non-vmx copy.
-	 */
-	pagefault_disable();
-
-	enable_kernel_altivec();
-
-	return 1;
-}
-
-/*
- * This function must return 0 because we tail call optimise when calling
- * from __copy_tofrom_user_power7 which returns 0 on success.
- */
-int exit_vmx_copy(void)
-{
-	pagefault_enable();
-	return 0;
-}
Index: linux-build/arch/powerpc/lib/copyuser_power7.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7.S	2012-05-29 21:22:43.725611809 +1000
+++ linux-build/arch/powerpc/lib/copyuser_power7.S	2012-05-30 10:23:29.198797007 +1000
@@ -61,7 +61,7 @@
 	ld	r15,STK_REG(r15)(r1)
 	ld	r14,STK_REG(r14)(r1)
 .Ldo_err3:
-	bl	.exit_vmx_copy
+	bl	.exit_vmx_usercopy
 	ld	r0,STACKFRAMESIZE+16(r1)
 	mtlr	r0
 	b	.Lexit
@@ -290,7 +290,7 @@ err1;	stb	r0,0(r3)
 	mflr	r0
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	.enter_vmx_copy
+	bl	.enter_vmx_usercopy
 	cmpwi	r3,0
 	ld	r0,STACKFRAMESIZE+16(r1)
 	ld	r3,STACKFRAMESIZE+48(r1)
@@ -507,7 +507,7 @@ err3;	lbz	r0,0(r4)
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	.exit_vmx_copy		/* tail call optimise */
+	b	.exit_vmx_usercopy	/* tail call optimise */
 
 .Lvmx_unaligned_copy:
 	/* Get the destination 16B aligned */
@@ -710,5 +710,5 @@ err3;	lbz	r0,0(r4)
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	.exit_vmx_copy		/* tail call optimise */
+	b	.exit_vmx_usercopy	/* tail call optimise */
 #endif /* CONFiG_ALTIVEC */

^ permalink raw reply

* [PATCH 2/2] powerpc: POWER7 optimised copy_page using VMX and enhanced prefetch
From: Anton Blanchard @ 2012-05-30  5:33 UTC (permalink / raw)
  To: benh, paulus, michael, linuxppc-dev
In-Reply-To: <20120530153124.6a27d10d@kryten>


Implement a POWER7 optimised copy_page using VMX and enhanced
prefetch instructions. We use enhanced prefetch hints to prefetch
both the load and store side. We copy a cacheline at a time and
fall back to regular loads and stores if we are unable to use VMX
(eg we are in an interrupt).

The following microbenchmark was used to assess the impact of
the patch:

http://ozlabs.org/~anton/junkcode/page_fault_file.c

We test MAP_PRIVATE page faults across a 1GB file, 100 times:

# time ./page_fault_file -p -l 1G -i 100

Before: 22.25s
After:  18.89s

17% faster

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/lib/copypage_power7.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/copypage_power7.S	2012-05-30 14:20:32.457035092 +1000
@@ -0,0 +1,168 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+_GLOBAL(copypage_power7)
+	/*
+	 * We prefetch both the source and destination using enhanced touch
+	 * instructions. We use a stream ID of 0 for the load side and
+	 * 1 for the store side. Since source and destination are page
+	 * aligned we don't need to clear the bottom 7 bits of either
+	 * address.
+	 */
+	ori	r9,r3,1		/* stream=1 */
+
+#ifdef CONFIG_PPC_64K_PAGES
+	lis	r7,0x0E01	/* depth=7, units=512 */
+#else
+	lis	r7,0x0E00	/* depth=7 */
+	ori	r7,r7,0x1000	/* units=32 */
+#endif
+	ori	r10,r7,1	/* stream=1 */
+
+	lis	r8,0x8000	/* GO=1 */
+	clrldi	r8,r8,32
+
+.machine push
+.machine "power4"
+	dcbt	r0,r4,0b01000
+	dcbt	r0,r7,0b01010
+	dcbtst	r0,r9,0b01000
+	dcbtst	r0,r10,0b01010
+	eieio
+	dcbt	r0,r8,0b01010	/* GO */
+.machine pop
+
+#ifdef CONFIG_ALTIVEC
+	mflr	r0
+	std	r3,48(r1)
+	std	r4,56(r1)
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	bl	.enter_vmx_copy
+	cmpwi	r3,0
+	ld	r0,STACKFRAMESIZE+16(r1)
+	ld	r3,STACKFRAMESIZE+48(r1)
+	ld	r4,STACKFRAMESIZE+56(r1)
+	mtlr	r0
+
+	li	r0,(PAGE_SIZE/128)
+	mtctr	r0
+
+	beq	.Lnonvmx_copy
+
+	addi	r1,r1,STACKFRAMESIZE
+
+	li	r6,16
+	li	r7,32
+	li	r8,48
+	li	r9,64
+	li	r10,80
+	li	r11,96
+	li	r12,112
+
+	.align	5
+1:	lvx	vr7,r0,r4
+	lvx	vr6,r4,r6
+	lvx	vr5,r4,r7
+	lvx	vr4,r4,r8
+	lvx	vr3,r4,r9
+	lvx	vr2,r4,r10
+	lvx	vr1,r4,r11
+	lvx	vr0,r4,r12
+	addi	r4,r4,128
+	stvx	vr7,r0,r3
+	stvx	vr6,r3,r6
+	stvx	vr5,r3,r7
+	stvx	vr4,r3,r8
+	stvx	vr3,r3,r9
+	stvx	vr2,r3,r10
+	stvx	vr1,r3,r11
+	stvx	vr0,r3,r12
+	addi	r3,r3,128
+	bdnz	1b
+
+	b	.exit_vmx_copy		/* tail call optimise */
+
+#else
+	li	r0,(PAGE_SIZE/128)
+	mtctr	r0
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+	std	r14,STK_REG(r14)(r1)
+	std	r15,STK_REG(r15)(r1)
+	std	r16,STK_REG(r16)(r1)
+	std	r17,STK_REG(r17)(r1)
+	std	r18,STK_REG(r18)(r1)
+	std	r19,STK_REG(r19)(r1)
+	std	r20,STK_REG(r20)(r1)
+
+1:	ld	r0,0(r4)
+	ld	r5,8(r4)
+	ld	r6,16(r4)
+	ld	r7,24(r4)
+	ld	r8,32(r4)
+	ld	r9,40(r4)
+	ld	r10,48(r4)
+	ld	r11,56(r4)
+	ld	r12,64(r4)
+	ld	r14,72(r4)
+	ld	r15,80(r4)
+	ld	r16,88(r4)
+	ld	r17,96(r4)
+	ld	r18,104(r4)
+	ld	r19,112(r4)
+	ld	r20,120(r4)
+	addi	r4,r4,128
+	std	r0,0(r3)
+	std	r5,8(r3)
+	std	r6,16(r3)
+	std	r7,24(r3)
+	std	r8,32(r3)
+	std	r9,40(r3)
+	std	r10,48(r3)
+	std	r11,56(r3)
+	std	r12,64(r3)
+	std	r14,72(r3)
+	std	r15,80(r3)
+	std	r16,88(r3)
+	std	r17,96(r3)
+	std	r18,104(r3)
+	std	r19,112(r3)
+	std	r20,120(r3)
+	addi	r3,r3,128
+	bdnz	1b
+
+	ld	r14,STK_REG(r14)(r1)
+	ld	r15,STK_REG(r15)(r1)
+	ld	r16,STK_REG(r16)(r1)
+	ld	r17,STK_REG(r17)(r1)
+	ld	r18,STK_REG(r18)(r1)
+	ld	r19,STK_REG(r19)(r1)
+	ld	r20,STK_REG(r20)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+	blr
Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile	2012-05-30 10:53:03.442309322 +1000
+++ linux-build/arch/powerpc/lib/Makefile	2012-05-30 11:07:54.361827650 +1000
@@ -17,7 +17,7 @@ obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
 			   checksum_wrappers_64.o hweight_64.o \
-			   copyuser_power7.o string_64.o
+			   copyuser_power7.o string_64.o copypage_power7.o
 obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
 obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
Index: linux-build/arch/powerpc/lib/copypage_64.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copypage_64.S	2012-05-30 10:53:03.430309112 +1000
+++ linux-build/arch/powerpc/lib/copypage_64.S	2012-05-30 11:07:54.361827650 +1000
@@ -17,7 +17,11 @@ PPC64_CACHES:
         .section        ".text"
 
 _GLOBAL(copy_page)
+BEGIN_FTR_SECTION
 	lis	r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+	b	.copypage_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
 	ld      r10,PPC64_CACHES@toc(r2)
Index: linux-build/arch/powerpc/lib/vmx-helper.c
===================================================================
--- linux-build.orig/arch/powerpc/lib/vmx-helper.c	2012-05-30 10:53:03.454309531 +1000
+++ linux-build/arch/powerpc/lib/vmx-helper.c	2012-05-30 14:20:38.637144276 +1000
@@ -49,3 +49,26 @@ int exit_vmx_usercopy(void)
 	pagefault_enable();
 	return 0;
 }
+
+int enter_vmx_copy(void)
+{
+	if (in_interrupt())
+		return 0;
+
+	preempt_disable();
+
+	enable_kernel_altivec();
+
+	return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_copy(void *dest)
+{
+	preempt_enable();
+	return dest;
+}

^ permalink raw reply

* [RFC] [PATCH] powerpc: Add MSR_DE to MSR_KERNEL
From: Joakim Tjernlund @ 2012-05-30  7:43 UTC (permalink / raw)
  To: linuxppc-dev, support, Bob Cochran

Emulators such as BDI2000 and CodeWarrior needs to have MSR_DE set
in order to support break points.
This adds MSR_DE for kernel space only.
---

I have tested this briefly with BDI2000 on P2010(e500) and
it works for me. I don't know if there are any bad side effects, therfore
this RFC.

 arch/powerpc/include/asm/reg.h       |    2 +-
 arch/powerpc/include/asm/reg_booke.h |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7fdc2c0..25c8554 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -108,7 +108,7 @@
 #define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
 /* Default MSR for kernel mode. */
-#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_DE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 500fe1d..0cb259b 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -37,7 +37,7 @@
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #else
-#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_CE)
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_CE|MSR_DE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
-- 
1.7.3.4

^ permalink raw reply related

* Re: [RFC] [PATCH] powerpc: Add MSR_DE to MSR_KERNEL
From: Dan Malek @ 2012-05-30  7:59 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Bob Cochran, support
In-Reply-To: <1338363814-19565-1-git-send-email-Joakim.Tjernlund@transmode.se>


Hi Joakim.

On May 30, 2012, at 12:43 AM, Joakim Tjernlund wrote:

> I have tested this briefly with BDI2000 on P2010(e500) and
> it works for me. I don't know if there are any bad side effects,  
> therfore
> this RFC.

We used to have MSR_DE surrounded by CONFIG_something
to ensure it wasn't set under normal operation.  IIRC, if MSR_DE
is set, you will have problems with software debuggers that
utilize the the debugging registers in the chip itself.  You only want
to force this to be set when using the BDI, not at other times.

Thanks.

	-- Dan

^ permalink raw reply

* [RFC PATCH powerpc] make CONFIG_NUMA depends on CONFIG_SMP
From: Li Zhong @ 2012-05-30  9:31 UTC (permalink / raw)
  Cc: Paul Mackerras, PowerPC email list

I'm not sure whether it makes sense to add this dependency to avoid
CONFI_NUMA && !CONFIG_SMP. 

I want to do this because I saw some build errors on next-tree when
compiling with CONFIG_SMP disabled, and it seems they are caused by some
codes under the CONFIG_NUMA #ifdefs.  

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 050cb37..b2aa74b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -394,7 +394,7 @@ config IRQ_ALL_CPUS
 
 config NUMA
 	bool "NUMA support"
-	depends on PPC64
+	depends on PPC64 && SMP
 	default y if SMP && PPC_PSERIES
 
 config NODES_SHIFT
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox