* [PATCH 2/2] powerpc: Use the new generic strncpy_from_user() and strnlen_user()
From: Paul Mackerras @ 2012-05-28 3:03 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linuxppc-dev, David Miller
In-Reply-To: <20120528025956.GA6822@bloggs.ozlabs.ibm.com>
This is much the same as for SPARC except that we can do the find_zero()
function more efficiently using the count-leading-zeroes instructions.
Tested on 32-bit and 64-bit PowerPC.
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
I'm sending this in without Ben H's ack as he is away for a few weeks for
medical reasons.
arch/powerpc/Kconfig | 2 ++
arch/powerpc/include/asm/uaccess.h | 41 ++++----------------------
arch/powerpc/include/asm/word-at-a-time.h | 41 ++++++++++++++++++++++++++
arch/powerpc/kernel/ppc_ksyms.c | 2 --
arch/powerpc/lib/string.S | 45 -----------------------------
5 files changed, 48 insertions(+), 83 deletions(-)
create mode 100644 arch/powerpc/include/asm/word-at-a-time.h
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 00b9874..050cb37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,6 +135,8 @@ config PPC
select GENERIC_CMOS_UPDATE
select GENERIC_TIME_VSYSCALL
select GENERIC_CLOCKEVENTS
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
config EARLY_PRINTK
bool
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index bd0fb84..17bb40c 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -40,6 +40,8 @@
#define segment_eq(a, b) ((a).seg == (b).seg)
+#define user_addr_max() (get_fs().seg)
+
#ifdef __powerpc64__
/*
* This check is sufficient because there is a large enough
@@ -453,42 +455,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
return size;
}
-extern int __strncpy_from_user(char *dst, const char __user *src, long count);
-
-static inline long strncpy_from_user(char *dst, const char __user *src,
- long count)
-{
- might_sleep();
- if (likely(access_ok(VERIFY_READ, src, 1)))
- return __strncpy_from_user(dst, src, count);
- return -EFAULT;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 for error
- */
-extern int __strnlen_user(const char __user *str, long len, unsigned long top);
-
-/*
- * Returns the length of the string at str (including the null byte),
- * or 0 if we hit a page we can't access,
- * or something > len if we didn't find a null byte.
- *
- * The `top' parameter to __strnlen_user is to make sure that
- * we can never overflow from the user area into kernel space.
- */
-static inline int strnlen_user(const char __user *str, long len)
-{
- unsigned long top = current->thread.fs.seg;
-
- if ((unsigned long)str > top)
- return 0;
- return __strnlen_user(str, len, top);
-}
-
-#define strlen_user(str) strnlen_user((str), 0x7ffffffe)
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
new file mode 100644
index 0000000..d0b6d4a
--- /dev/null
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * Word-at-a-time interfaces for PowerPC.
+ */
+
+#include <linux/kernel.h>
+#include <asm/asm-compat.h>
+
+struct word_at_a_time {
+ const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+ unsigned long mask = (val & c->low_bits) + c->low_bits;
+ return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+ long leading_zero_bits;
+
+ asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
+ return leading_zero_bits >> 3;
+}
+
+static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+ return (val + c->high_bits) & ~rhs;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d1f2aaf..3e40315 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -85,8 +85,6 @@ EXPORT_SYMBOL(csum_tcpudp_magic);
EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
EXPORT_SYMBOL(copy_page);
#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 455881a..093d631 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -160,48 +160,3 @@ _GLOBAL(__clear_user)
PPC_LONG 1b,91b
PPC_LONG 8b,92b
.text
-
-_GLOBAL(__strncpy_from_user)
- addi r6,r3,-1
- addi r4,r4,-1
- cmpwi 0,r5,0
- beq 2f
- mtctr r5
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- beq 3f
-2: addi r6,r6,1
-3: subf r3,r3,r6
- blr
-99: li r3,-EFAULT
- blr
-
- .section __ex_table,"a"
- PPC_LONG 1b,99b
- .text
-
-/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
-_GLOBAL(__strnlen_user)
- addi r7,r3,-1
- subf r6,r7,r5 /* top+1 - str */
- cmplw 0,r4,r6
- bge 0f
- mr r6,r4
-0: mtctr r6 /* ctr = min(len, top - str) */
-1: lbzu r0,1(r7) /* get next byte */
- cmpwi 0,r0,0
- bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
- addi r7,r7,1
- subf r3,r3,r7 /* number of bytes we have looked at */
- beqlr /* return if we found a 0 byte */
- cmpw 0,r3,r4 /* did we look at all len bytes? */
- blt 99f /* if not, must have hit top */
- addi r3,r4,1 /* return len + 1 to indicate no null found */
- blr
-99: li r3,0 /* bad address, return 0 */
- blr
-
- .section __ex_table,"a"
- PPC_LONG 1b,99b
--
1.7.10.rc3.219.g53414
^ permalink raw reply related
* Re: [PATCH 1/2] lib: Fix generic strnlen_user for 32-bit big-endian machines
From: David Miller @ 2012-05-28 3:55 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev, torvalds
In-Reply-To: <20120528025956.GA6822@bloggs.ozlabs.ibm.com>
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 28 May 2012 12:59:56 +1000
> The aligned_byte_mask() definition is wrong for 32-bit big-endian
> machines: the "7-(n)" part of the definition assumes a long is 8
> bytes. This fixes it by using BITS_PER_LONG - 8 instead of 8*7.
> Tested on 32-bit and 64-bit PowerPC.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: David S. Miller <davem@davemloft.net>
^ permalink raw reply
* Re: [PATCH 2/2] powerpc: Use the new generic strncpy_from_user() and strnlen_user()
From: David Miller @ 2012-05-28 3:56 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev, torvalds
In-Reply-To: <20120528030347.GB6822@bloggs.ozlabs.ibm.com>
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 28 May 2012 13:03:47 +1000
> This is much the same as for SPARC except that we can do the find_zero()
> function more efficiently using the count-leading-zeroes instructions.
> Tested on 32-bit and 64-bit PowerPC.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
Looks great.
Acked-by: David S. Miller <davem@davemloft.net>
I might hack sparc64 to use 'lzd' or 'popc' on newer chips via code
patching at some point.
^ permalink raw reply
* [PATCH] powerpc: 64bit optimised __clear_user
From: Anton Blanchard @ 2012-05-28 5:54 UTC (permalink / raw)
To: benh, paulus, michael, linuxppc-dev
I noticed __clear_user high up in a profile of one of my RAID stress
tests. The testcase was doing a dd from /dev/zero which ends up
calling __clear_user.
__clear_user is basically a loop with a single 4 byte store which
is horribly slow. We can do much better by aligning the desination
and doing 32 bytes of 8 byte stores in a loop.
The following testcase was used to verify the patch:
http://ozlabs.org/~anton/junkcode/stress_clear_user.c
To show the improvement in performance I ran a dd from /dev/zero
to /dev/null on a POWER7 box:
Before:
# dd if=/dev/zero of=/dev/null bs=1M count=10000
10485760000 bytes (10 GB) copied, 3.72379 s, 2.8 GB/s
After:
# time dd if=/dev/zero of=/dev/null bs=1M count=10000
10485760000 bytes (10 GB) copied, 0.728318 s, 14.4 GB/s
Over 5x faster.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Interestingly, it picked up an issue with the old clear_user which
fails when we are less than 4 bytes to the end of a page and the
next page is unmapped:
offset 4094 length 526 expected 2 got -1
expected 0x00 at offset 4094, got 0xff
expected 0x00 at offset 4095, got 0xff
We should fix that.
Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile 2012-05-28 10:59:09.281806751 +1000
+++ linux-build/arch/powerpc/lib/Makefile 2012-05-28 11:02:35.017452778 +1000
@@ -17,7 +17,7 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
memcpy_64.o usercopy_64.o mem_64.o string.o \
checksum_wrappers_64.o hweight_64.o \
- copyuser_power7.o
+ copyuser_power7.o string_64.o
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
Index: linux-build/arch/powerpc/lib/string_64.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/string_64.S 2012-05-28 14:56:03.937833406 +1000
@@ -0,0 +1,141 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to: Destination address, in user space.
+ * @n: Number of bytes to zero.
+ *
+ * Zero a block of memory in user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+ .macro err1
+100:
+ .section __ex_table,"a"
+ .align 3
+ .llong 100b,.Ldo_err1
+ .previous
+ .endm
+
+ .macro err2
+200:
+ .section __ex_table,"a"
+ .align 3
+ .llong 200b,.Ldo_err2
+ .previous
+ .endm
+
+ .macro err3
+300:
+ .section __ex_table,"a"
+ .align 3
+ .llong 300b,.Ldo_err3
+ .previous
+ .endm
+
+.Ldo_err1:
+ mr r3,r8
+
+.Ldo_err2:
+ mtctr r4
+1:
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ addi r4,r4,-1
+ bdnz 1b
+
+.Ldo_err3:
+ mr r3,r4
+ blr
+
+_GLOBAL(__clear_user)
+ cmpdi r4,32
+ neg r6,r3
+ li r0,0
+ blt .Lshort_clear
+ mr r8,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ /* Get the destination 8 byte aligned */
+ bf cr7*4+3,1f
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r4,r4,r6
+ srdi r6,r4,5
+ cmpdi r4,32
+ blt .Lshort_clear
+ mtctr r6
+
+ /* Do 32 byte chunks */
+4:
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+err2; std r0,16(r3)
+err2; std r0,24(r3)
+ addi r3,r3,32
+ addi r4,r4,-32
+ bdnz 4b
+
+.Lshort_clear:
+ /* up to 31 bytes to go */
+ cmpdi r4,16
+ blt 6f
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+ addi r3,r3,16
+ addi r4,r4,-16
+
+ /* Up to 15 bytes to go */
+6: mr r8,r3
+ clrldi r4,r4,(64-4)
+ mtocrf 0x01,r4
+ bf cr7*4+0,7f
+err1; std r0,0(r3)
+ addi r3,r3,8
+
+7: bf cr7*4+1,8f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+8: bf cr7*4+2,9f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+9: bf cr7*4+3,10f
+err1; stb r0,0(r3)
+
+10: li r3,0
+ blr
Index: linux-build/arch/powerpc/lib/string.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/string.S 2011-09-07 15:15:49.146459439 +1000
+++ linux-build/arch/powerpc/lib/string.S 2012-05-28 11:01:28.728249934 +1000
@@ -119,6 +119,7 @@ _GLOBAL(memchr)
2: li r3,0
blr
+#ifdef CONFIG_PPC32
_GLOBAL(__clear_user)
addi r6,r3,-4
li r3,0
@@ -160,6 +161,7 @@ _GLOBAL(__clear_user)
PPC_LONG 1b,91b
PPC_LONG 8b,92b
.text
+#endif
_GLOBAL(__strncpy_from_user)
addi r6,r3,-1
^ permalink raw reply
* [RFC PATCH 09/10] POWERPC: smp: remove call to ipi_call_lock()/ipi_call_unlock()
From: Yong Zhang @ 2012-05-29 7:16 UTC (permalink / raw)
To: linux-kernel
Cc: axboe, sshtylyov, nikunj, david.daney, peterz, akpm, ralf,
Paul Mackerras, srivatsa.bhat, tglx, paulmck, linuxppc-dev, mingo
In-Reply-To: <1338275765-3217-1-git-send-email-yong.zhang0@gmail.com>
From: Yong Zhang <yong.zhang@windriver.com>
1) call_function.lock used in smp_call_function_many() is just to protect
call_function.queue and &data->refs, cpu_online_mask is outside of the
lock. And it's not necessary to protect cpu_online_mask,
because data->cpumask is pre-calculate and even if a cpu is brougt up
when calling arch_send_call_function_ipi_mask(), it's harmless because
validation test in generic_smp_call_function_interrupt() will take care
of it.
2) For cpu down issue, stop_machine() will guarantee that no concurrent
smp_call_fuction() is processing.
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
arch/powerpc/kernel/smp.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb343..e1417c4 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
if (system_state == SYSTEM_RUNNING)
vdso_data->processorCount++;
#endif
- ipi_call_lock();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
of_node_put(np);
}
of_node_put(l2_cache);
- ipi_call_unlock();
local_irq_enable();
--
1.7.5.4
^ permalink raw reply related
* Re: [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
From: Li Yang @ 2012-05-29 7:30 UTC (permalink / raw)
To: Zhao Chenhui; +Cc: scottwood, linuxppc-dev, linux-kernel
In-Reply-To: <1336737235-15370-1-git-send-email-chenhui.zhao@freescale.com>
Hi Scott,
Thanks for the valuable comment raised before and we have updated the
patches accordingly. Please review the updated patch set and ACK if
they are good to you. We hope it can be applied in this window.
Leo
On Fri, May 11, 2012 at 7:53 PM, Zhao Chenhui
<chenhui.zhao@freescale.com> wrote:
> Do hardware timebase sync. Firstly, stop all timebases, and transfer
> the timebase value of the boot core to the other core. Finally,
> start all timebases.
>
> Only apply to dual-core chips, such as MPC8572, P2020, etc.
>
> Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> ---
> =C2=A0arch/powerpc/include/asm/fsl_guts.h | =C2=A0 =C2=A02 +
> =C2=A0arch/powerpc/platforms/85xx/smp.c =C2=A0 | =C2=A0 93 ++++++++++++++=
+++++++++++++++++++--
> =C2=A02 files changed, 91 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/a=
sm/fsl_guts.h
> index aa4c488..dd5ba2c 100644
> --- a/arch/powerpc/include/asm/fsl_guts.h
> +++ b/arch/powerpc/include/asm/fsl_guts.h
> @@ -48,6 +48,8 @@ struct ccsr_guts {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 __be32 =C2=A0dmuxcr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0/* 0x.0068 - DMA Mux Control Register */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 u8 =C2=A0 =C2=A0 res06c[0x70 - 0x6c];
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0devdisr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0/* 0x.0070 - Device Disable Control */
> +#define CCSR_GUTS_DEVDISR_TB1 =C2=A00x00001000
> +#define CCSR_GUTS_DEVDISR_TB0 =C2=A00x00004000
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0devdisr2; =C2=A0 =C2=A0 =C2=A0 /*=
0x.0074 - Device Disable Control 2 */
> =C2=A0 =C2=A0 =C2=A0 =C2=A0u8 =C2=A0 =C2=A0 =C2=A0res078[0x7c - 0x78];
> =C2=A0 =C2=A0 =C2=A0 =C2=A0__be32 =C2=A0pmjcr; =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0/* 0x.007c - 4 Power Management Jog Control Register */
> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/8=
5xx/smp.c
> index ff42490..6862dda 100644
> --- a/arch/powerpc/platforms/85xx/smp.c
> +++ b/arch/powerpc/platforms/85xx/smp.c
> @@ -24,6 +24,7 @@
> =C2=A0#include <asm/mpic.h>
> =C2=A0#include <asm/cacheflush.h>
> =C2=A0#include <asm/dbell.h>
> +#include <asm/fsl_guts.h>
>
> =C2=A0#include <sysdev/fsl_soc.h>
> =C2=A0#include <sysdev/mpic.h>
> @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr)
>
> =C2=A0struct smp_ops_t smp_85xx_ops =3D {
> =C2=A0 =C2=A0 =C2=A0 =C2=A0.kick_cpu =3D smp_85xx_kick_cpu,
> -#ifdef CONFIG_KEXEC
> - =C2=A0 =C2=A0 =C2=A0 .give_timebase =C2=A0=3D smp_generic_give_timebase=
,
> - =C2=A0 =C2=A0 =C2=A0 .take_timebase =C2=A0=3D smp_generic_take_timebase=
,
> -#endif
> =C2=A0};
>
> =C2=A0#ifdef CONFIG_KEXEC
> +static struct ccsr_guts __iomem *guts;
> +static u64 timebase;
> +static int tb_req;
> +static int tb_valid;
> +
> +static void mpc85xx_timebase_freeze(int freeze)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned int mask;
> +
> + =C2=A0 =C2=A0 =C2=A0 if (!guts)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return;
> +
> + =C2=A0 =C2=A0 =C2=A0 mask =3D CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR=
_TB1;
> + =C2=A0 =C2=A0 =C2=A0 if (freeze)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 setbits32(&guts->devdi=
sr, mask);
> + =C2=A0 =C2=A0 =C2=A0 else
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 clrbits32(&guts->devdi=
sr, mask);
> +
> + =C2=A0 =C2=A0 =C2=A0 in_be32(&guts->devdisr);
> +}
> +
> +static void mpc85xx_give_timebase(void)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_save(flags);
> +
> + =C2=A0 =C2=A0 =C2=A0 while (!tb_req)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> + =C2=A0 =C2=A0 =C2=A0 tb_req =3D 0;
> +
> + =C2=A0 =C2=A0 =C2=A0 mpc85xx_timebase_freeze(1);
> + =C2=A0 =C2=A0 =C2=A0 timebase =3D get_tb();
> + =C2=A0 =C2=A0 =C2=A0 mb();
> + =C2=A0 =C2=A0 =C2=A0 tb_valid =3D 1;
> +
> + =C2=A0 =C2=A0 =C2=A0 while (tb_valid)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> +
> + =C2=A0 =C2=A0 =C2=A0 mpc85xx_timebase_freeze(0);
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_restore(flags);
> +}
> +
> +static void mpc85xx_take_timebase(void)
> +{
> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_save(flags);
> +
> + =C2=A0 =C2=A0 =C2=A0 tb_req =3D 1;
> + =C2=A0 =C2=A0 =C2=A0 while (!tb_valid)
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 barrier();
> +
> + =C2=A0 =C2=A0 =C2=A0 set_tb(timebase >> 32, timebase & 0xffffffff);
> + =C2=A0 =C2=A0 =C2=A0 mb();
> + =C2=A0 =C2=A0 =C2=A0 tb_valid =3D 0;
> +
> + =C2=A0 =C2=A0 =C2=A0 local_irq_restore(flags);
> +}
> +
> =C2=A0atomic_t kexec_down_cpus =3D ATOMIC_INIT(0);
>
> =C2=A0void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
> @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr)
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0doorbell_setup_thi=
s_cpu();
> =C2=A0}
>
> +#ifdef CONFIG_KEXEC
> +static const struct of_device_id guts_ids[] =3D {
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8572-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8560-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,mpc8536-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1020-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1021-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1022-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p1023-guts", },
> + =C2=A0 =C2=A0 =C2=A0 { .compatible =3D "fsl,p2020-guts", },
> + =C2=A0 =C2=A0 =C2=A0 {},
> +};
> +#endif
> +
> =C2=A0void __init mpc85xx_smp_init(void)
> =C2=A0{
> =C2=A0 =C2=A0 =C2=A0 =C2=A0struct device_node *np;
> @@ -249,6 +321,19 @@ void __init mpc85xx_smp_init(void)
> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0smp_85xx_ops.cause=
_ipi =3D doorbell_cause_ipi;
> =C2=A0 =C2=A0 =C2=A0 =C2=A0}
>
> +#ifdef CONFIG_KEXEC
> + =C2=A0 =C2=A0 =C2=A0 np =3D of_find_matching_node(NULL, guts_ids);
> + =C2=A0 =C2=A0 =C2=A0 if (np) {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 guts =3D of_iomap(np, =
0);
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.give_time=
base =3D mpc85xx_give_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.take_time=
base =3D mpc85xx_take_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 of_node_put(np);
> + =C2=A0 =C2=A0 =C2=A0 } else {
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.give_time=
base =3D smp_generic_give_timebase;
> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 smp_85xx_ops.take_time=
base =3D smp_generic_take_timebase;
> + =C2=A0 =C2=A0 =C2=A0 }
> +#endif
> +
> =C2=A0 =C2=A0 =C2=A0 =C2=A0smp_ops =3D &smp_85xx_ops;
>
> =C2=A0#ifdef CONFIG_KEXEC
> --
> 1.6.4.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" i=
n
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at =C2=A0http://www.tux.org/lkml/
--=20
- Leo
^ permalink raw reply
* [PATCH] powerpc: Use enhanced touch instructions in POWER7 copy_to_user/copy_from_user
From: Anton Blanchard @ 2012-05-29 8:14 UTC (permalink / raw)
To: benh, paulus, michael, amodra; +Cc: linuxppc-dev
Version 2.06 of the POWER ISA introduced enhanced touch instructions,
allowing us to specify a number of attributes including the length of
a stream.
This patch adds a software stream for both loads and stores in the
POWER7 copy_tofrom_user loop. Since the setup is quite complicated
and we have to use an eieio to ensure correct ordering of the "GO"
command we only do this for copies above 4kB.
To quantify any performance improvements we need a working set
bigger than the caches so we operate on a 1GB file:
# dd if=/dev/zero of=/tmp/foo bs=1M count=1024
And we compare how fast we can read the file:
# dd if=/tmp/foo of=/dev/null bs=1M
before: 7.7 GB/s
after: 9.6 GB/s
A 25% improvement.
The worst case for this patch will be a completely L1 cache contained
copy of just over 4kB. We can test this with the copy_to_user
testcase we used to tune copy_tofrom_user originally:
http://ozlabs.org/~anton/junkcode/copy_to_user.c
# time ./copy_to_user2 -l 4224 -i 10000000
before: 6.807 s
after: 6.946 s
A 2% slowdown, which seems reasonable considering our data is unlikely
to be completely L1 contained.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-build/arch/powerpc/lib/copyuser_power7.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7.S 2012-03-01 16:15:31.073813523 +1100
+++ linux-build/arch/powerpc/lib/copyuser_power7.S 2012-05-29 16:14:44.129704887 +1000
@@ -298,6 +298,37 @@ err1; stb r0,0(r3)
ld r5,STACKFRAMESIZE+64(r1)
mtlr r0
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r6,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
beq .Lunwind_stack_nonvmx_copy
/*
^ permalink raw reply
* RE: pread() and pwrite() system calls
From: David Laight @ 2012-05-29 8:28 UTC (permalink / raw)
To: Gabriel Paubert; +Cc: linuxppc-dev
In-Reply-To: <20120525164550.GA32406@visitor2.iram.es>
> > A special pread/pwrite asm stub that just copies
> > r7 to r0 could be used.
> >=20
> > Would it be enough to do:
> > syscall_pread_pwrite:
> > mov 0,7
> > sc
> > blr
> > and handle the -ve -> errno in C?
>=20
> Huh? Won't fly, r0 is used for the system call number!
I was copying that from r7!
Actually I have a much better stub by copying the one
used for mmap().
The system call itself is fine.
Using the system call almost halved the time taken
for a 4-byte read.
> On the other hand, I believed PPC had no problems passing
> up to 8 32 bit arguments in registers (r3 to r10), but
> I may be confusing with the standard ABI for function calls.
>=20
> Hmm, a quick look at kernel/entry_32.s shows that it should=20
> be able to use at least r3 to r8, which should be sufficient.
>=20
> I think that it is an uClibc problem.
True, in that it isn't a kernel bug.
OTOH the kernel is likely to get blamed for non-atomic pread.
I've found the same user-space code in newlib as well.
glibc may be ok, some code I've found implies it only
uses the 'emulation' when the system call returns ENOSYS.
David
^ permalink raw reply
* RE: pread() and pwrite() system calls
From: Michael Ellerman @ 2012-05-29 8:54 UTC (permalink / raw)
To: David Laight; +Cc: linuxppc-dev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B6F29@saturn3.aculab.com>
On Tue, 2012-05-29 at 09:28 +0100, David Laight wrote:
> > > A special pread/pwrite asm stub that just copies
> > > r7 to r0 could be used.
> > >
> > > Would it be enough to do:
> > > syscall_pread_pwrite:
> > > mov 0,7
> > > sc
> > > blr
> > > and handle the -ve -> errno in C?
> >
> > Huh? Won't fly, r0 is used for the system call number!
>
> I was copying that from r7!
Using the non-existant 'mov' instruction!
or 0,7,7
is much clearer :)
cheers
^ permalink raw reply
* [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Anton Blanchard @ 2012-05-29 11:20 UTC (permalink / raw)
To: benh, paulus, michael; +Cc: linuxppc-dev
In system call exit we currently clear RI and EE separately. An
mtmsrd is a slow operation and we can save cycles by doing it all
in one go.
This does complicate things a bit - we have to be careful to restore
RI if we branch out before returning to userspace.
On a POWER7 with virtual cputime disabled this patch improves the
null system call by 7%.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-build/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-28 18:23:33.374451416 +1000
+++ linux-build/arch/powerpc/kernel/entry_64.S 2012-05-29 21:18:22.280934940 +1000
@@ -197,7 +197,16 @@ syscall_exit:
wrteei 0
#else
ld r10,PACAKMSR(r13)
- mtmsrd r10,1
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
ld r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,7 @@ syscall_enosys:
b syscall_exit
syscall_exit_work:
+ mtmsrd r10,1 /* Restore RI */
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
^ permalink raw reply
* MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Joakim Tjernlund @ 2012-05-29 12:00 UTC (permalink / raw)
To: linuxppc-dev
I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
MSR[DE] is off quite often by just stopping at random times and looking at MSR so
it seems like the kernel is turning MSR[DE] off most of the time.
Anyone else having success debugging 3.3 with BDI2000?
This is on a P2010(E500/BOOKE) CPU.
^ permalink raw reply
* RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
From: Zhao Chenhui-B35336 @ 2012-05-29 12:20 UTC (permalink / raw)
To: galak@kernel.crashing.org
Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <1336737235-15370-1-git-send-email-chenhui.zhao@freescale.com>
Hi Kumar,
There is no comment for these patches so far. Do you think these patches ca=
n be merged?
We really want these patches to be merged in this merge window.
Thanks.
Best Regards,
Chenhui
> -----Original Message-----
> From: Zhao Chenhui-B35336
> Sent: Friday, May 25, 2012 3:09 PM
> To: Wood Scott-B07421; galak@kernel.crashing.org
> Cc: Li Yang-R58472
> Subject: RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement ha=
rdware timebase sync
>=20
> Hi Scott and Kumar,
>=20
> Do you have comments for these patches?
>=20
> http://patchwork.ozlabs.org/patch/158484/
> http://patchwork.ozlabs.org/patch/158485/
> http://patchwork.ozlabs.org/patch/158487/
> http://patchwork.ozlabs.org/patch/158486/
> http://patchwork.ozlabs.org/patch/158488/
>=20
> Thanks.
>=20
> Best Regards,
> Chenhui
>=20
> > -----Original Message-----
> > From: linuxppc-release-bounces@linux.freescale.net [mailto:linuxppc-rel=
ease-
> > bounces@linux.freescale.net] On Behalf Of Zhao Chenhui-B35336
> > Sent: Friday, May 11, 2012 7:54 PM
> > To: linuxppc-dev@lists.ozlabs.org
> > Cc: Wood Scott-B07421; Li Yang-R58472; linux-kernel@vger.kernel.org; ga=
lak@kernel.crashing.org
> > Subject: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hard=
ware timebase sync
> >
> > Do hardware timebase sync. Firstly, stop all timebases, and transfer
> > the timebase value of the boot core to the other core. Finally,
> > start all timebases.
> >
> > Only apply to dual-core chips, such as MPC8572, P2020, etc.
> >
> > Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> > Signed-off-by: Li Yang <leoli@freescale.com>
> > ---
> > arch/powerpc/include/asm/fsl_guts.h | 2 +
> > arch/powerpc/platforms/85xx/smp.c | 93 +++++++++++++++++++++++++++=
++++++--
> > 2 files changed, 91 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include=
/asm/fsl_guts.h
> > index aa4c488..dd5ba2c 100644
> > --- a/arch/powerpc/include/asm/fsl_guts.h
> > +++ b/arch/powerpc/include/asm/fsl_guts.h
> > @@ -48,6 +48,8 @@ struct ccsr_guts {
> > __be32 dmuxcr; /* 0x.0068 - DMA Mux Control Register */
> > u8 res06c[0x70 - 0x6c];
> > __be32 devdisr; /* 0x.0070 - Device Disable Control */
> > +#define CCSR_GUTS_DEVDISR_TB1 0x00001000
> > +#define CCSR_GUTS_DEVDISR_TB0 0x00004000
> > __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */
> > u8 res078[0x7c - 0x78];
> > __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control Register =
*/
> > diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms=
/85xx/smp.c
> > index ff42490..6862dda 100644
> > --- a/arch/powerpc/platforms/85xx/smp.c
> > +++ b/arch/powerpc/platforms/85xx/smp.c
> > @@ -24,6 +24,7 @@
> > #include <asm/mpic.h>
> > #include <asm/cacheflush.h>
> > #include <asm/dbell.h>
> > +#include <asm/fsl_guts.h>
> >
> > #include <sysdev/fsl_soc.h>
> > #include <sysdev/mpic.h>
> > @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr)
> >
> > struct smp_ops_t smp_85xx_ops =3D {
> > .kick_cpu =3D smp_85xx_kick_cpu,
> > -#ifdef CONFIG_KEXEC
> > - .give_timebase =3D smp_generic_give_timebase,
> > - .take_timebase =3D smp_generic_take_timebase,
> > -#endif
> > };
> >
> > #ifdef CONFIG_KEXEC
> > +static struct ccsr_guts __iomem *guts;
> > +static u64 timebase;
> > +static int tb_req;
> > +static int tb_valid;
> > +
> > +static void mpc85xx_timebase_freeze(int freeze)
> > +{
> > + unsigned int mask;
> > +
> > + if (!guts)
> > + return;
> > +
> > + mask =3D CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
> > + if (freeze)
> > + setbits32(&guts->devdisr, mask);
> > + else
> > + clrbits32(&guts->devdisr, mask);
> > +
> > + in_be32(&guts->devdisr);
> > +}
> > +
> > +static void mpc85xx_give_timebase(void)
> > +{
> > + unsigned long flags;
> > +
> > + local_irq_save(flags);
> > +
> > + while (!tb_req)
> > + barrier();
> > + tb_req =3D 0;
> > +
> > + mpc85xx_timebase_freeze(1);
> > + timebase =3D get_tb();
> > + mb();
> > + tb_valid =3D 1;
> > +
> > + while (tb_valid)
> > + barrier();
> > +
> > + mpc85xx_timebase_freeze(0);
> > +
> > + local_irq_restore(flags);
> > +}
> > +
> > +static void mpc85xx_take_timebase(void)
> > +{
> > + unsigned long flags;
> > +
> > + local_irq_save(flags);
> > +
> > + tb_req =3D 1;
> > + while (!tb_valid)
> > + barrier();
> > +
> > + set_tb(timebase >> 32, timebase & 0xffffffff);
> > + mb();
> > + tb_valid =3D 0;
> > +
> > + local_irq_restore(flags);
> > +}
> > +
> > atomic_t kexec_down_cpus =3D ATOMIC_INIT(0);
> >
> > void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
> > @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr)
> > doorbell_setup_this_cpu();
> > }
> >
> > +#ifdef CONFIG_KEXEC
> > +static const struct of_device_id guts_ids[] =3D {
> > + { .compatible =3D "fsl,mpc8572-guts", },
> > + { .compatible =3D "fsl,mpc8560-guts", },
> > + { .compatible =3D "fsl,mpc8536-guts", },
> > + { .compatible =3D "fsl,p1020-guts", },
> > + { .compatible =3D "fsl,p1021-guts", },
> > + { .compatible =3D "fsl,p1022-guts", },
> > + { .compatible =3D "fsl,p1023-guts", },
> > + { .compatible =3D "fsl,p2020-guts", },
> > + {},
> > +};
> > +#endif
> > +
> > void __init mpc85xx_smp_init(void)
> > {
> > struct device_node *np;
> > @@ -249,6 +321,19 @@ void __init mpc85xx_smp_init(void)
> > smp_85xx_ops.cause_ipi =3D doorbell_cause_ipi;
> > }
> >
> > +#ifdef CONFIG_KEXEC
> > + np =3D of_find_matching_node(NULL, guts_ids);
> > + if (np) {
> > + guts =3D of_iomap(np, 0);
> > + smp_85xx_ops.give_timebase =3D mpc85xx_give_timebase;
> > + smp_85xx_ops.take_timebase =3D mpc85xx_take_timebase;
> > + of_node_put(np);
> > + } else {
> > + smp_85xx_ops.give_timebase =3D smp_generic_give_timebase;
> > + smp_85xx_ops.take_timebase =3D smp_generic_take_timebase;
> > + }
> > +#endif
> > +
> > smp_ops =3D &smp_85xx_ops;
> >
> > #ifdef CONFIG_KEXEC
> > --
> > 1.6.4.1
> >
> > _______________________________________________
> > linuxppc-release mailing list
> > linuxppc-release@linux.freescale.net
> > http://linux.freescale.net/mailman/listinfo/linuxppc-release
^ permalink raw reply
* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Scott Wood @ 2012-05-29 18:02 UTC (permalink / raw)
To: Anthony Foiani
Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
Jeff Garzik, Adrian Bunk
In-Reply-To: <g7gvzbfhb.fsf@dworkin.scrye.com>
On 05/26/2012 01:53 AM, Anthony Foiani wrote:
> Li Yang-R58472 <r58472@freescale.com> writes:
>
>> Thanks for bringing [CONFIG_MPC8315_DS] up again. Looks like we do
>> have a problem here.
>
> My impression is that the simplest fix is Adrian's patch, which simply
> keys off CONFIG_MPC831x_RDB. It's not very satisfying, but I'll take
> "working" vs. "rare lockups at boot".
CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
only that the kernel supports those boards. It should be a runtime test.
-Scott
^ permalink raw reply
* Re: [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Scott Wood @ 2012-05-29 18:07 UTC (permalink / raw)
To: Anton Blanchard; +Cc: michael, paulus, linuxppc-dev
In-Reply-To: <20120529212010.0152a083@kryten>
On 05/29/2012 06:20 AM, Anton Blanchard wrote:
>
> In system call exit we currently clear RI and EE separately. An
> mtmsrd is a slow operation and we can save cycles by doing it all
> in one go.
>
> This does complicate things a bit - we have to be careful to restore
> RI if we branch out before returning to userspace.
>
> On a POWER7 with virtual cputime disabled this patch improves the
> null system call by 7%.
>
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
>
> Index: linux-build/arch/powerpc/kernel/entry_64.S
> ===================================================================
> --- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-28 18:23:33.374451416 +1000
> +++ linux-build/arch/powerpc/kernel/entry_64.S 2012-05-29 21:18:22.280934940 +1000
> @@ -197,7 +197,16 @@ syscall_exit:
> wrteei 0
> #else
> ld r10,PACAKMSR(r13)
> - mtmsrd r10,1
> + /*
> + * For performance reasons we clear RI the same time that we
> + * clear EE. We only need to clear RI just before we restore r13
> + * below, but batching it with EE saves us one expensive mtmsrd call.
> + * We have to be careful to restore RI if we branch anywhere from
> + * here (eg syscall_exit_work).
> + */
> + li r9,MSR_RI
> + andc r11,r10,r9
> + mtmsrd r11,1
> #endif /* CONFIG_PPC_BOOK3E */
>
> ld r9,TI_FLAGS(r12)
> @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
> END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
> andi. r6,r8,MSR_PR
> ld r4,_LINK(r1)
> - /*
> - * Clear RI before restoring r13. If we are returning to
> - * userspace and we take an exception after restoring r13,
> - * we end up corrupting the userspace r13 value.
> - */
> -#ifdef CONFIG_PPC_BOOK3S
> - /* No MSR:RI on BookE */
> - li r12,MSR_RI
> - andc r11,r10,r12
> - mtmsrd r11,1 /* clear MSR.RI */
> -#endif /* CONFIG_PPC_BOOK3S */
>
> beq- 1f
> ACCOUNT_CPU_USER_EXIT(r11, r12)
> @@ -271,6 +269,7 @@ syscall_enosys:
> b syscall_exit
>
> syscall_exit_work:
> + mtmsrd r10,1 /* Restore RI */
That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S
-Scott
^ permalink raw reply
* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Joakim Tjernlund @ 2012-05-29 18:29 UTC (permalink / raw)
To: Bob Cochran; +Cc: linuxppc-dev, support
In-Reply-To: <4FC511C1.4050007@mindchasers.com>
Bob Cochran <ppc@mindchasers.com> wrote on 2012/05/29 20:13:21:
>
> On 05/29/2012 08:00 AM, Joakim Tjernlund wrote:
> >
> > I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
> > depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
> > MSR[DE] is off quite often by just stopping at random times and looking at MSR so
> > it seems like the kernel is turning MSR[DE] off most of the time.
> > Anyone else having success debugging 3.3 with BDI2000?
> >
> > This is on a P2010(E500/BOOKE) CPU.
> >
> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-dev
> >
>
> I debug using Freescale CodeWarrior and a USB tap, which also rely on
> MSR[DE] being set. I develop from the mainline & have a patch set that
> I just recently re-tweaked to support kernel debugging.
>
> If you want, I'll send you my set of patches for the kernel. They might
> be useful (not sure since I don't use BDI).
Thanks, that could be useful, however I just figured something out.
Changing
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 500fe1d..0cb259b 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -37,7 +37,7 @@
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#else
-#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE)
+#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE|MSR_DE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
Made it work(possible one should change MSR_USER too?)
Question now is why MSR_DE is not on by default? Especially since
BDI2000 is supported by the kernel(CONFIG_BDI_SWITCH=y) is on in my kernel?
Jocke
^ permalink raw reply related
* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Bob Cochran @ 2012-05-29 18:13 UTC (permalink / raw)
To: Joakim Tjernlund; +Cc: linuxppc-dev
In-Reply-To: <OF00FC5182.FED0E75C-ONC1257A0D.003AA532-C1257A0D.0041F687@transmode.se>
On 05/29/2012 08:00 AM, Joakim Tjernlund wrote:
>
> I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it
> depends on MSR[DE] is cleared by the kernel. With the emulator I can see that
> MSR[DE] is off quite often by just stopping at random times and looking at MSR so
> it seems like the kernel is turning MSR[DE] off most of the time.
> Anyone else having success debugging 3.3 with BDI2000?
>
> This is on a P2010(E500/BOOKE) CPU.
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
I debug using Freescale CodeWarrior and a USB tap, which also rely on
MSR[DE] being set. I develop from the mainline & have a patch set that
I just recently re-tweaked to support kernel debugging.
If you want, I'll send you my set of patches for the kernel. They might
be useful (not sure since I don't use BDI).
^ permalink raw reply
* Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
From: Wolfgang Denk @ 2012-05-29 20:13 UTC (permalink / raw)
To: Bob Cochran; +Cc: linuxppc-dev
In-Reply-To: <4FC511C1.4050007@mindchasers.com>
Dear Bob,
In message <4FC511C1.4050007@mindchasers.com> you wrote:
>
> I debug using Freescale CodeWarrior and a USB tap, which also rely on
> MSR[DE] being set. I develop from the mainline & have a patch set that
> I just recently re-tweaked to support kernel debugging.
>
> If you want, I'll send you my set of patches for the kernel. They might
> be useful (not sure since I don't use BDI).
Please rather submit as a patch on the mailing list.
Thanks.
Best regards,
Wolfgang Denk
--
DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
egrep patterns are full regular expressions; it uses a fast determi-
nistic algorithm that sometimes needs exponential space.
- unix manuals
^ permalink raw reply
* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Anthony Foiani @ 2012-05-29 22:07 UTC (permalink / raw)
To: Scott Wood
Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
Jeff Garzik, Adrian Bunk
In-Reply-To: <4FC50F22.2040404@freescale.com>
Scott Wood <scottwood@freescale.com> writes:
> CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
> only that the kernel supports those boards. It should be a runtime
> test.
Point taken.
If that SATA check is CPU/SOC-based, then it should be easy enough to
test. The cpuinfo for my board is:
# cat /proc/cpuinfo
processor : 0
cpu : e300c3
clock : 266.666664MHz
revision : 2.0 (pvr 8085 0020)
bogomips : 66.66
timebase : 33333333
On the other hand, if the problem is actually caused by board trace
routing (or other hardware that's outside the control of the CPU/SOC),
then I don't know how possible a runtime check will be.
Do you know if there is a specific errata that the MPC8315_DS ran
across that required this fix, or was it a band-aid in the first
place?
Either way, thanks for looking into this.
Thanks,
Tony
^ permalink raw reply
* [PATCH] powerpc: Clear RI and EE at the same time in system call exit
From: Anton Blanchard @ 2012-05-29 22:22 UTC (permalink / raw)
To: benh, paulus, michael, scottwood; +Cc: linuxppc-dev
In-Reply-To: <4FC5104E.1000504@freescale.com>
Hi Scott,
> > syscall_exit_work:
> > + mtmsrd r10,1 /* Restore RI */
>
> That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S
Thanks!
Anton
--
In system call exit we currently clear RI and EE separately. An
mtmsrd is a slow operation and we can save cycles by doing it all
in one go.
This does complicate things a bit - we have to be careful to restore
RI if we branch out before returning to userspace.
On a POWER7 with virtual cputime disabled this patch improves the
null system call by 7%.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-build/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-29 21:22:40.293549055 +1000
+++ linux-build/arch/powerpc/kernel/entry_64.S 2012-05-30 07:47:31.380737406 +1000
@@ -197,7 +197,16 @@ syscall_exit:
wrteei 0
#else
ld r10,PACAKMSR(r13)
- mtmsrd r10,1
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
ld r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,9 @@ syscall_enosys:
b syscall_exit
syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+ mtmsrd r10,1 /* Restore RI */
+#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
^ permalink raw reply
* Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
From: Scott Wood @ 2012-05-29 22:57 UTC (permalink / raw)
To: Anthony Foiani
Cc: Robert P.J.Day, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
Jeff Garzik, Adrian Bunk
In-Reply-To: <gvcjetze3.fsf@dworkin.scrye.com>
On 05/29/2012 05:07 PM, Anthony Foiani wrote:
> Scott Wood <scottwood@freescale.com> writes:
>
>> CONFIG_MPC831x_RDB doesn't mean that you're running on such a board,
>> only that the kernel supports those boards. It should be a runtime
>> test.
>
> Point taken.
>
> If that SATA check is CPU/SOC-based, then it should be easy enough to
> test. The cpuinfo for my board is:
>
> # cat /proc/cpuinfo
> processor : 0
> cpu : e300c3
> clock : 266.666664MHz
> revision : 2.0 (pvr 8085 0020)
> bogomips : 66.66
> timebase : 33333333
>
> On the other hand, if the problem is actually caused by board trace
> routing (or other hardware that's outside the control of the CPU/SOC),
> then I don't know how possible a runtime check will be.
Board information is available from the device tree, and from platform
code that was selected based on the device tree.
> Do you know if there is a specific errata that the MPC8315_DS ran
> across that required this fix, or was it a band-aid in the first
> place?
I don't know the history of this, sorry. It looks like Yang Li added
this code -- Yang, can you answer this?
-Scott
^ permalink raw reply
* [PATCH 1/2] powerpc: Rename copyuser_power7_vmx.c to vmx-helper.c
From: Anton Blanchard @ 2012-05-30 5:31 UTC (permalink / raw)
To: benh, paulus, michael, linuxppc-dev
Subsequent patches will add more VMX library functions and it makes
sense to keep all the c-code helper functions in the one file.
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile 2012-05-30 09:39:59.084233436 +1000
+++ linux-build/arch/powerpc/lib/Makefile 2012-05-30 10:22:32.565764322 +1000
@@ -24,7 +24,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sste
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_SMP) += locks.o
-obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o
+obj-$(CONFIG_ALTIVEC) += vmx-helper.o
endif
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
Index: linux-build/arch/powerpc/lib/vmx-helper.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/vmx-helper.c 2012-05-30 10:22:32.577764541 +1000
@@ -0,0 +1,51 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
+ * Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+
+int enter_vmx_usercopy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ /* This acts as preempt_disable() as well and will make
+ * enable_kernel_altivec(). We need to disable page faults
+ * as they can call schedule and thus make us lose the VMX
+ * context. So on page faults, we just fail which will cause
+ * a fallback to the normal non-vmx copy.
+ */
+ pagefault_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * This function must return 0 because we tail call optimise when calling
+ * from __copy_tofrom_user_power7 which returns 0 on success.
+ */
+int exit_vmx_usercopy(void)
+{
+ pagefault_enable();
+ return 0;
+}
Index: linux-build/arch/powerpc/lib/copyuser_power7_vmx.c
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7_vmx.c 2012-05-28 17:18:38.213091662 +1000
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
@@ -1,51 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2011
- *
- * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
- * Anton Blanchard <anton@au.ibm.com>
- */
-#include <linux/uaccess.h>
-#include <linux/hardirq.h>
-#include <asm/switch_to.h>
-
-int enter_vmx_copy(void)
-{
- if (in_interrupt())
- return 0;
-
- /* This acts as preempt_disable() as well and will make
- * enable_kernel_altivec(). We need to disable page faults
- * as they can call schedule and thus make us lose the VMX
- * context. So on page faults, we just fail which will cause
- * a fallback to the normal non-vmx copy.
- */
- pagefault_disable();
-
- enable_kernel_altivec();
-
- return 1;
-}
-
-/*
- * This function must return 0 because we tail call optimise when calling
- * from __copy_tofrom_user_power7 which returns 0 on success.
- */
-int exit_vmx_copy(void)
-{
- pagefault_enable();
- return 0;
-}
Index: linux-build/arch/powerpc/lib/copyuser_power7.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copyuser_power7.S 2012-05-29 21:22:43.725611809 +1000
+++ linux-build/arch/powerpc/lib/copyuser_power7.S 2012-05-30 10:23:29.198797007 +1000
@@ -61,7 +61,7 @@
ld r15,STK_REG(r15)(r1)
ld r14,STK_REG(r14)(r1)
.Ldo_err3:
- bl .exit_vmx_copy
+ bl .exit_vmx_usercopy
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
@@ -290,7 +290,7 @@ err1; stb r0,0(r3)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl .enter_vmx_copy
+ bl .enter_vmx_usercopy
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STACKFRAMESIZE+48(r1)
@@ -507,7 +507,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_copy /* tail call optimise */
+ b .exit_vmx_usercopy /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -710,5 +710,5 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b .exit_vmx_copy /* tail call optimise */
+ b .exit_vmx_usercopy /* tail call optimise */
#endif /* CONFiG_ALTIVEC */
^ permalink raw reply
* [PATCH 2/2] powerpc: POWER7 optimised copy_page using VMX and enhanced prefetch
From: Anton Blanchard @ 2012-05-30 5:33 UTC (permalink / raw)
To: benh, paulus, michael, linuxppc-dev
In-Reply-To: <20120530153124.6a27d10d@kryten>
Implement a POWER7 optimised copy_page using VMX and enhanced
prefetch instructions. We use enhanced prefetch hints to prefetch
both the load and store side. We copy a cacheline at a time and
fall back to regular loads and stores if we are unable to use VMX
(eg we are in an interrupt).
The following microbenchmark was used to assess the impact of
the patch:
http://ozlabs.org/~anton/junkcode/page_fault_file.c
We test MAP_PRIVATE page faults across a 1GB file, 100 times:
# time ./page_fault_file -p -l 1G -i 100
Before: 22.25s
After: 18.89s
17% faster
Signed-off-by: Anton Blanchard <anton@samba.org>
---
Index: linux-build/arch/powerpc/lib/copypage_power7.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/lib/copypage_power7.S 2012-05-30 14:20:32.457035092 +1000
@@ -0,0 +1,168 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+_GLOBAL(copypage_power7)
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side. Since source and destination are page
+ * aligned we don't need to clear the bottom 7 bits of either
+ * address.
+ */
+ ori r9,r3,1 /* stream=1 */
+
+#ifdef CONFIG_PPC_64K_PAGES
+ lis r7,0x0E01 /* depth=7, units=512 */
+#else
+ lis r7,0x0E00 /* depth=7 */
+ ori r7,r7,0x1000 /* units=32 */
+#endif
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+.machine push
+.machine "power4"
+ dcbt r0,r4,0b01000
+ dcbt r0,r7,0b01010
+ dcbtst r0,r9,0b01000
+ dcbtst r0,r10,0b01010
+ eieio
+ dcbt r0,r8,0b01010 /* GO */
+.machine pop
+
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r3,48(r1)
+ std r4,56(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl .enter_vmx_copy
+ cmpwi r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STACKFRAMESIZE+48(r1)
+ ld r4,STACKFRAMESIZE+56(r1)
+ mtlr r0
+
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ beq .Lnonvmx_copy
+
+ addi r1,r1,STACKFRAMESIZE
+
+ li r6,16
+ li r7,32
+ li r8,48
+ li r9,64
+ li r10,80
+ li r11,96
+ li r12,112
+
+ .align 5
+1: lvx vr7,r0,r4
+ lvx vr6,r4,r6
+ lvx vr5,r4,r7
+ lvx vr4,r4,r8
+ lvx vr3,r4,r9
+ lvx vr2,r4,r10
+ lvx vr1,r4,r11
+ lvx vr0,r4,r12
+ addi r4,r4,128
+ stvx vr7,r0,r3
+ stvx vr6,r3,r6
+ stvx vr5,r3,r7
+ stvx vr4,r3,r8
+ stvx vr3,r3,r9
+ stvx vr2,r3,r10
+ stvx vr1,r3,r11
+ stvx vr0,r3,r12
+ addi r3,r3,128
+ bdnz 1b
+
+ b .exit_vmx_copy /* tail call optimise */
+
+#else
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ stdu r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+ std r14,STK_REG(r14)(r1)
+ std r15,STK_REG(r15)(r1)
+ std r16,STK_REG(r16)(r1)
+ std r17,STK_REG(r17)(r1)
+ std r18,STK_REG(r18)(r1)
+ std r19,STK_REG(r19)(r1)
+ std r20,STK_REG(r20)(r1)
+
+1: ld r0,0(r4)
+ ld r5,8(r4)
+ ld r6,16(r4)
+ ld r7,24(r4)
+ ld r8,32(r4)
+ ld r9,40(r4)
+ ld r10,48(r4)
+ ld r11,56(r4)
+ ld r12,64(r4)
+ ld r14,72(r4)
+ ld r15,80(r4)
+ ld r16,88(r4)
+ ld r17,96(r4)
+ ld r18,104(r4)
+ ld r19,112(r4)
+ ld r20,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r5,8(r3)
+ std r6,16(r3)
+ std r7,24(r3)
+ std r8,32(r3)
+ std r9,40(r3)
+ std r10,48(r3)
+ std r11,56(r3)
+ std r12,64(r3)
+ std r14,72(r3)
+ std r15,80(r3)
+ std r16,88(r3)
+ std r17,96(r3)
+ std r18,104(r3)
+ std r19,112(r3)
+ std r20,120(r3)
+ addi r3,r3,128
+ bdnz 1b
+
+ ld r14,STK_REG(r14)(r1)
+ ld r15,STK_REG(r15)(r1)
+ ld r16,STK_REG(r16)(r1)
+ ld r17,STK_REG(r17)(r1)
+ ld r18,STK_REG(r18)(r1)
+ ld r19,STK_REG(r19)(r1)
+ ld r20,STK_REG(r20)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ blr
Index: linux-build/arch/powerpc/lib/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/lib/Makefile 2012-05-30 10:53:03.442309322 +1000
+++ linux-build/arch/powerpc/lib/Makefile 2012-05-30 11:07:54.361827650 +1000
@@ -17,7 +17,7 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
memcpy_64.o usercopy_64.o mem_64.o string.o \
checksum_wrappers_64.o hweight_64.o \
- copyuser_power7.o string_64.o
+ copyuser_power7.o string_64.o copypage_power7.o
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
Index: linux-build/arch/powerpc/lib/copypage_64.S
===================================================================
--- linux-build.orig/arch/powerpc/lib/copypage_64.S 2012-05-30 10:53:03.430309112 +1000
+++ linux-build/arch/powerpc/lib/copypage_64.S 2012-05-30 11:07:54.361827650 +1000
@@ -17,7 +17,11 @@ PPC64_CACHES:
.section ".text"
_GLOBAL(copy_page)
+BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+ b .copypage_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
BEGIN_FTR_SECTION
ld r10,PPC64_CACHES@toc(r2)
Index: linux-build/arch/powerpc/lib/vmx-helper.c
===================================================================
--- linux-build.orig/arch/powerpc/lib/vmx-helper.c 2012-05-30 10:53:03.454309531 +1000
+++ linux-build/arch/powerpc/lib/vmx-helper.c 2012-05-30 14:20:38.637144276 +1000
@@ -49,3 +49,26 @@ int exit_vmx_usercopy(void)
pagefault_enable();
return 0;
}
+
+int enter_vmx_copy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ preempt_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_copy(void *dest)
+{
+ preempt_enable();
+ return dest;
+}
^ permalink raw reply
* [RFC] [PATCH] powerpc: Add MSR_DE to MSR_KERNEL
From: Joakim Tjernlund @ 2012-05-30 7:43 UTC (permalink / raw)
To: linuxppc-dev, support, Bob Cochran
Emulators such as BDI2000 and CodeWarrior needs to have MSR_DE set
in order to support break points.
This adds MSR_DE for kernel space only.
---
I have tested this briefly with BDI2000 on P2010(e500) and
it works for me. I don't know if there are any bad side effects, therfore
this RFC.
arch/powerpc/include/asm/reg.h | 2 +-
arch/powerpc/include/asm/reg_booke.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7fdc2c0..25c8554 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -108,7 +108,7 @@
#define MSR_USER64 MSR_USER32 | MSR_64BIT
#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
/* Default MSR for kernel mode. */
-#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR)
+#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_DE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 500fe1d..0cb259b 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -37,7 +37,7 @@
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#else
-#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE)
+#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE|MSR_DE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
--
1.7.3.4
^ permalink raw reply related
* Re: [RFC] [PATCH] powerpc: Add MSR_DE to MSR_KERNEL
From: Dan Malek @ 2012-05-30 7:59 UTC (permalink / raw)
To: Joakim Tjernlund; +Cc: linuxppc-dev, Bob Cochran, support
In-Reply-To: <1338363814-19565-1-git-send-email-Joakim.Tjernlund@transmode.se>
Hi Joakim.
On May 30, 2012, at 12:43 AM, Joakim Tjernlund wrote:
> I have tested this briefly with BDI2000 on P2010(e500) and
> it works for me. I don't know if there are any bad side effects,
> therfore
> this RFC.
We used to have MSR_DE surrounded by CONFIG_something
to ensure it wasn't set under normal operation. IIRC, if MSR_DE
is set, you will have problems with software debuggers that
utilize the the debugging registers in the chip itself. You only want
to force this to be set when using the BDI, not at other times.
Thanks.
-- Dan
^ permalink raw reply
* [RFC PATCH powerpc] make CONFIG_NUMA depends on CONFIG_SMP
From: Li Zhong @ 2012-05-30 9:31 UTC (permalink / raw)
Cc: Paul Mackerras, PowerPC email list
I'm not sure whether it makes sense to add this dependency to avoid
CONFI_NUMA && !CONFIG_SMP.
I want to do this because I saw some build errors on next-tree when
compiling with CONFIG_SMP disabled, and it seems they are caused by some
codes under the CONFIG_NUMA #ifdefs.
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
---
arch/powerpc/Kconfig | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 050cb37..b2aa74b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -394,7 +394,7 @@ config IRQ_ALL_CPUS
config NUMA
bool "NUMA support"
- depends on PPC64
+ depends on PPC64 && SMP
default y if SMP && PPC_PSERIES
config NODES_SHIFT
--
1.7.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox