* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
@ 2010-06-29 10:34 Tony Lindgren
0 siblings, 0 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-06-29 10:34 UTC (permalink / raw)
To: linux-arm-kernel
The TLS register is only available on ARM1136 r1p0 and later.
Set HWCAP_TLS flags if hardware TLS is available and test for
it if CONFIG_CPU_32v6K is not set for V6.
Note that we set the TLS instruction in __kuser_get_tls
dynamically as suggested by Jamie Lokier <jamie@shareable.org>.
Also the __switch_to code is optimized out in most cases as
suggested by Nicolas Pitre <nico@fluxnic.net>.
Signed-off-by: Tony Lindgren <tony@atomide.com>
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index f7bd52b..c1062c3 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -19,6 +19,7 @@
#define HWCAP_NEON 4096
#define HWCAP_VFPv3 8192
#define HWCAP_VFPv3D16 16384
+#define HWCAP_TLS 32768
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
/*
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
new file mode 100644
index 0000000..e565f33
--- /dev/null
+++ b/arch/arm/include/asm/tls.h
@@ -0,0 +1,46 @@
+#ifndef __ASMARM_TLS_H
+#define __ASMARM_TLS_H
+
+#ifdef __ASSEMBLY__
+ .macro set_tls_none, tp, tmp1, tmp2
+ .endm
+
+ .macro set_tls_v6k, tp, tmp1, tmp2
+ mcr p15, 0, \tp, c13, c0, 3 @ set TLS register
+ .endm
+
+ .macro set_tls_v6, tp, tmp1, tmp2
+ ldr \tmp1, =elf_hwcap
+ ldr \tmp1, [\tmp1, #0]
+ mov \tmp2, #0xffff0fff
+ tst \tmp1, #HWCAP_TLS @ hardware TLS available?
+ mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register
+ streq \tp, [\tmp2, #-15] @ set TLS value at 0xffff0ff0
+ .endm
+
+ .macro set_tls_software, tp, tmp1, tmp2
+ mov \tmp1, #0xffff0fff
+ str \tp, [\tmp1, #-15] @ set TLS value at 0xffff0ff0
+ .endm
+#endif
+
+#ifdef CONFIG_TLS_REG_EMUL
+#define tls_emu 1
+#define has_tls 1
+#define set_tls set_tls_none
+#elif __LINUX_ARM_ARCH__ >= 7 || \
+ (__LINUX_ARM_ARCH__ == 6 && defined(CONFIG_CPU_32v6K))
+#define tls_emu 0
+#define has_tls 1
+#define set_tls set_tls_v6k
+#elif __LINUX_ARM_ARCH__ == 6
+#define tls_emu 0
+#define has_tls (elf_hwcap & HWCAP_TLS)
+#define set_tls set_tls_v6
+#else
+#define tls_emu 0
+#define has_tls 0
+#define set_tls set_tls_software
+#endif
+
+#endif /* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7ee48e7..a6cfb17 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -22,6 +22,7 @@
#include <asm/thread_notify.h>
#include <asm/unwind.h>
#include <asm/unistd.h>
+#include <asm/tls.h>
#include "entry-header.S"
@@ -739,12 +740,7 @@ ENTRY(__switch_to)
#ifdef CONFIG_MMU
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
-#if defined(CONFIG_HAS_TLS_REG)
- mcr p15, 0, r3, c13, c0, 3 @ set TLS register
-#elif !defined(CONFIG_TLS_REG_EMUL)
- mov r4, #0xffff0fff
- str r3, [r4, #-15] @ TLS val at 0xffff0ff0
-#endif
+ set_tls r3, r4, r5
#ifdef CONFIG_MMU
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
#endif
@@ -1009,17 +1005,12 @@ kuser_cmpxchg_fixup:
*/
__kuser_get_tls: @ 0xffff0fe0
-
-#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
- ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0
-#else
- mrc p15, 0, r0, c13, c0, 3 @ read TLS register
-#endif
+ ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init
usr_ret lr
-
- .rep 5
- .word 0 @ pad up to __kuser_helper_version
- .endr
+ mrc p15, 0, r0, c13, c0, 3 @ 0xffff0fe8 hardware TLS code
+ .rep 4
+ .word 0 @ 0xffff0ff0 software TLS value, then
+ .endr @ pad up to __kuser_helper_version
/*
* Reference declaration:
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 122d999..6e2c6f9 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -269,6 +269,21 @@ static void __init cacheid_init(void)
extern struct proc_info_list *lookup_processor_type(unsigned int);
extern struct machine_desc *lookup_machine_type(unsigned int);
+static void __init feat_v6_fixup(void)
+{
+ int id = read_cpuid_id();
+
+ if ((id & 0x410f0000) != 0x41070000)
+ return;
+
+ /*
+ * HWCAP_TLS is available only on 1136 r1p0 and later,
+ * see also kuser_get_tls_init.
+ */
+ if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
+ elf_hwcap &= ~HWCAP_TLS;
+}
+
static void __init setup_processor(void)
{
struct proc_info_list *list;
@@ -311,6 +326,8 @@ static void __init setup_processor(void)
elf_hwcap &= ~HWCAP_THUMB;
#endif
+ feat_v6_fixup();
+
cacheid_init();
cpu_proc_init();
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1621e53..e84d210 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -30,6 +30,7 @@
#include <asm/unistd.h>
#include <asm/traps.h>
#include <asm/unwind.h>
+#include <asm/tls.h>
#include "ptrace.h"
#include "signal.h"
@@ -518,17 +519,20 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
-#if defined(CONFIG_HAS_TLS_REG)
- asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
-#elif !defined(CONFIG_TLS_REG_EMUL)
- /*
- * User space must never try to access this directly.
- * Expect your app to break eventually if you do so.
- * The user helper at 0xffff0fe0 must be used instead.
- * (see entry-armv.S for details)
- */
- *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
-#endif
+ if (tls_emu)
+ return 0;
+ if (has_tls) {
+ asm ("mcr p15, 0, %0, c13, c0, 3"
+ : : "r" (regs->ARM_r0));
+ } else {
+ /*
+ * User space must never try to access this directly.
+ * Expect your app to break eventually if you do so.
+ * The user helper at 0xffff0fe0 must be used instead.
+ * (see entry-armv.S for details)
+ */
+ *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+ }
return 0;
#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG
@@ -743,6 +747,16 @@ void __init trap_init(void)
return;
}
+static void __init kuser_get_tls_init(unsigned long vectors)
+{
+ /*
+ * vectors + 0xfe0 = __kuser_get_tls
+ * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
+ */
+ if (tls_emu || has_tls)
+ memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+}
+
void __init early_trap_init(void)
{
unsigned long vectors = CONFIG_VECTORS_BASE;
@@ -761,6 +775,11 @@ void __init early_trap_init(void)
memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
/*
+ * Do processor specific fixups for the kuser helpers
+ */
+ kuser_get_tls_init(vectors);
+
+ /*
* Copy signal return handlers into the vector page, and
* set sigreturn to be a pointer to these.
*/
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 346ae14..71d5d5e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -717,17 +717,6 @@ config TLS_REG_EMUL
a few prototypes like that in existence) and therefore access to
that required register must be emulated.
-config HAS_TLS_REG
- bool
- depends on !TLS_REG_EMUL
- default y if SMP || CPU_32v7
- help
- This selects support for the CP15 thread register.
- It is defined to be available on some ARMv6 processors (including
- all SMP capable ARMv6's) or later processors. User space may
- assume directly accessing that register and always obtain the
- expected value only on ARMv7 and above.
-
config NEEDS_SYSCALL_FOR_CMPXCHG
bool
help
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 7a5337e..e10626a 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -239,7 +239,8 @@ __v6_proc_info:
b __v6_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+ /* See also feat_v6_fixup() for HWCAP_TLS */
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
.long cpu_v6_name
.long v6_processor_functions
.long v6wbi_tlb_fns
@@ -262,7 +263,8 @@ __pj4_v6_proc_info:
b __v6_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+ /* See also feat_v6_fixup() for HWCAP_TLS */
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_pj4_name
.long v6_processor_functions
.long v6wbi_tlb_fns
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7aaf88a..8071bcd 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -344,7 +344,7 @@ __v7_proc_info:
b __v7_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_v7_name
.long v7_processor_functions
.long v7wbi_tlb_fns
--kjpMrWxdCilgNbo1--
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
@ 2010-03-18 19:02 Tony Lindgren
0 siblings, 0 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-03-18 19:02 UTC (permalink / raw)
To: linux-arm-kernel
The TLS register is only available on V6 r1p0 and later.
Test for it and use it if available.
Signed-off-by: Tony Lindgren <tony@atomide.com>
diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index f7bd52b..c1062c3 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -19,6 +19,7 @@
#define HWCAP_NEON 4096
#define HWCAP_VFPv3 8192
#define HWCAP_VFPv3D16 16384
+#define HWCAP_TLS 32768
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 6c5cf36..5a5aac8 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -739,11 +739,13 @@ ENTRY(__switch_to)
#ifdef CONFIG_MMU
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
-#if defined(CONFIG_HAS_TLS_REG)
- mcr p15, 0, r3, c13, c0, 3 @ set TLS register
-#elif !defined(CONFIG_TLS_REG_EMUL)
- mov r4, #0xffff0fff
- str r3, [r4, #-15] @ TLS val at 0xffff0ff0
+#if !defined(CONFIG_TLS_REG_EMUL)
+ ldr r4, =elf_hwcap
+ ldr r4, [r4, #0]
+ tst r4, #HWCAP_TLS @ hardware with TLS?
+ mcrne p15, 0, r3, c13, c0, 3 @ set TLS register
+ moveq r4, #0xffff0fff
+ streq r3, [r4, #-15] @ TLS val at 0xffff0ff0
#endif
#ifdef CONFIG_MMU
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
@@ -1009,15 +1011,12 @@ kuser_cmpxchg_fixup:
*/
__kuser_get_tls: @ 0xffff0fe0
-
-#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
- ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0
-#else
- mrc p15, 0, r0, c13, c0, 3 @ read TLS register
-#endif
+ ldr r0, [pc, #(16 - 8)] @ TLS set at 0xffff0ff0?
+ cmp r0, #0 @ assume hw TLS if not set
+ mrceq p15, 0, r0, c13, c0, 3 @ read TLS register
usr_ret lr
- .rep 5
+ .rep 3
.word 0 @ pad up to __kuser_helper_version
.endr
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c91c77b..de9d2dc 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -269,6 +269,24 @@ static void __init cacheid_init(void)
extern struct proc_info_list *lookup_processor_type(unsigned int);
extern struct machine_desc *lookup_machine_type(unsigned int);
+#ifdef CONFIG_CPU_V6
+static void __init feat_v6_fixup(void)
+{
+ int id = read_cpuid_id();
+
+ if (id & 0x000f0000 != 0x00070000)
+ return;
+
+ /* HWCAP_TLS is available only on V6 r1p0 and later */
+ if (((id >> 20) & 3) == 0)
+ elf_hwcap &= ~HWCAP_TLS;
+}
+#else
+static inline void feat_v6_fixup(void)
+{
+}
+#endif
+
static void __init setup_processor(void)
{
struct proc_info_list *list;
@@ -311,6 +329,8 @@ static void __init setup_processor(void)
elf_hwcap &= ~HWCAP_THUMB;
#endif
+ feat_v6_fixup();
+
cacheid_init();
cpu_proc_init();
}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1621e53..bdffef5 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -518,16 +518,19 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
-#if defined(CONFIG_HAS_TLS_REG)
- asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
-#elif !defined(CONFIG_TLS_REG_EMUL)
- /*
- * User space must never try to access this directly.
- * Expect your app to break eventually if you do so.
- * The user helper at 0xffff0fe0 must be used instead.
- * (see entry-armv.S for details)
- */
- *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+#if !defined(CONFIG_TLS_REG_EMUL)
+ if (elf_hwcap & HWCAP_TLS) {
+ asm ("mcr p15, 0, %0, c13, c0, 3"
+ : : "r" (regs->ARM_r0));
+ } else {
+ /*
+ * User space must never try to access this directly.
+ * Expect your app to break eventually if you do so.
+ * The user helper at 0xffff0fe0 must be used instead.
+ * (see entry-armv.S for details)
+ */
+ *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+ }
#endif
return 0;
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c4ed9f9..45e99c1 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -715,17 +715,6 @@ config TLS_REG_EMUL
a few prototypes like that in existence) and therefore access to
that required register must be emulated.
-config HAS_TLS_REG
- bool
- depends on !TLS_REG_EMUL
- default y if SMP || CPU_32v7
- help
- This selects support for the CP15 thread register.
- It is defined to be available on some ARMv6 processors (including
- all SMP capable ARMv6's) or later processors. User space may
- assume directly accessing that register and always obtain the
- expected value only on ARMv7 and above.
-
config NEEDS_SYSCALL_FOR_CMPXCHG
bool
help
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 7a5337e..e10626a 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -239,7 +239,8 @@ __v6_proc_info:
b __v6_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+ /* See also feat_v6_fixup() for HWCAP_TLS */
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
.long cpu_v6_name
.long v6_processor_functions
.long v6wbi_tlb_fns
@@ -262,7 +263,8 @@ __pj4_v6_proc_info:
b __v6_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+ /* See also feat_v6_fixup() for HWCAP_TLS */
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_pj4_name
.long v6_processor_functions
.long v6wbi_tlb_fns
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7aaf88a..8071bcd 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -344,7 +344,7 @@ __v7_proc_info:
b __v7_setup
.long cpu_arch_name
.long cpu_elf_name
- .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+ .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_v7_name
.long v7_processor_functions
.long v7wbi_tlb_fns
--jB+02Y6wHc2pEa2x--
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7
@ 2010-03-17 17:57 Tony Lindgren
2010-03-17 18:07 ` Catalin Marinas
0 siblings, 1 reply; 11+ messages in thread
From: Tony Lindgren @ 2010-03-17 17:57 UTC (permalink / raw)
To: linux-arm-kernel
Hi all,
Here's an updated version of this patch with more details.
Looks like VFPv3 is only available on V7:
http://www.arm.com/products/processors/technologies/vector-floating-point.php
HAS_TLS reg is only on ARM11 starting with r1p0:
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
So that explains why it won't work on omap2420 as it's r0p2.
Regards,
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7
2010-03-17 17:57 [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7 Tony Lindgren
@ 2010-03-17 18:07 ` Catalin Marinas
2010-03-17 19:11 ` Tony Lindgren
0 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2010-03-17 18:07 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 2010-03-17 at 17:57 +0000, Tony Lindgren wrote:
> Here's an updated version of this patch with more details.
>
> Looks like VFPv3 is only available on V7:
>
> http://www.arm.com/products/processors/technologies/vector-floating-point.php
But does it cause any problem if the feature is enabled in the kernel?
The vfp_init() code should check for its presence and set the hwcap
accordingly.
Ideally, we should fix the VFP handling code to cope with dynamic
detection.
> HAS_TLS reg is only on ARM11 starting with r1p0:
>
> http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
>
> So that explains why it won't work on omap2420 as it's r0p2.
Same here, would it work with dynamic detection?
I would like to get v6+v7 support working fine together on RealView
boards as well (though not much spare time) but without disabling the
features that are present on v7 if they can be detected at run-time.
--
Catalin
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7
2010-03-17 18:07 ` Catalin Marinas
@ 2010-03-17 19:11 ` Tony Lindgren
2010-03-18 11:13 ` Catalin Marinas
0 siblings, 1 reply; 11+ messages in thread
From: Tony Lindgren @ 2010-03-17 19:11 UTC (permalink / raw)
To: linux-arm-kernel
* Catalin Marinas <catalin.marinas@arm.com> [100317 11:04]:
> On Wed, 2010-03-17 at 17:57 +0000, Tony Lindgren wrote:
> > Here's an updated version of this patch with more details.
> >
> > Looks like VFPv3 is only available on V7:
> >
> > http://www.arm.com/products/processors/technologies/vector-floating-point.php
>
> But does it cause any problem if the feature is enabled in the kernel?
> The vfp_init() code should check for its presence and set the hwcap
> accordingly.
Yeah, it causes the problem posted in the patch description. I took a
quick look at it and at least the VFPFMRX in vfpmacros.h for CONFIG_VFPv3
is a problem.
Also I think we would need to have separate vfp_get_double functions
in vfphw.S for VFPv2 and 3 that get used based on the features.
> Ideally, we should fix the VFP handling code to cope with dynamic
> detection.
I agree, being able to boot the same kernel and avoiding tens of
recompiles to test something is a major time saver :)
> > HAS_TLS reg is only on ARM11 starting with r1p0:
> >
> > http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
> >
> > So that explains why it won't work on omap2420 as it's r0p2.
>
> Same here, would it work with dynamic detection?
Hmm I believe here the problem is __switch_to in entry-armv.S.
I don't think we want to dynamically test it every time.. Or
at least it would have to be optimized out in most cases.
> I would like to get v6+v7 support working fine together on RealView
> boards as well (though not much spare time) but without disabling the
> features that are present on v7 if they can be detected at run-time.
I totally agree with you there.
Regards,
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7
2010-03-17 19:11 ` Tony Lindgren
@ 2010-03-18 11:13 ` Catalin Marinas
2010-03-18 17:00 ` Tony Lindgren
0 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2010-03-18 11:13 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 2010-03-17 at 19:11 +0000, Tony Lindgren wrote:
> * Catalin Marinas <catalin.marinas@arm.com> [100317 11:04]:
> > On Wed, 2010-03-17 at 17:57 +0000, Tony Lindgren wrote:
> > > Here's an updated version of this patch with more details.
> > >
> > > Looks like VFPv3 is only available on V7:
> > >
> > > http://www.arm.com/products/processors/technologies/vector-floating-point.php
> >
> > But does it cause any problem if the feature is enabled in the kernel?
> > The vfp_init() code should check for its presence and set the hwcap
> > accordingly.
>
> Yeah, it causes the problem posted in the patch description. I took a
> quick look at it and at least the VFPFMRX in vfpmacros.h for CONFIG_VFPv3
> is a problem.
This would indeed need more checking to avoid reading some registers
which aren't present on ARMv6.
I think the main problem with just falling back to VFPv2 is the lack of
NEON support even if the CPU supports it.
> Also I think we would need to have separate vfp_get_double functions
> in vfphw.S for VFPv2 and 3 that get used based on the features.
I don't think that's causing problems (or at least we can identify where
the function gets called for higher VFP registers). Even with VFPv3, you
may not have the D16-D31 registers.
> > > HAS_TLS reg is only on ARM11 starting with r1p0:
> > >
> > > http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
> > >
> > > So that explains why it won't work on omap2420 as it's r0p2.
> >
> > Same here, would it work with dynamic detection?
>
> Hmm I believe here the problem is __switch_to in entry-armv.S.
> I don't think we want to dynamically test it every time.. Or
> at least it would have to be optimized out in most cases.
But if you disable this, you won't be able to use an SMP build on both
v6 and v7. Anyway, I don't think that dynamically checking this would
introduce performance penalties, the __switch_to code is pretty complex
already with all the notifier calls.
We may also have optimised user space that reads the TLS register
directly rather than going through the kuser helper, in which case we
would need a kernel built only for ARMv7 (maybe that's acceptable in
this situation).
--
Catalin
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7
2010-03-18 11:13 ` Catalin Marinas
@ 2010-03-18 17:00 ` Tony Lindgren
2010-03-19 1:35 ` [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
0 siblings, 1 reply; 11+ messages in thread
From: Tony Lindgren @ 2010-03-18 17:00 UTC (permalink / raw)
To: linux-arm-kernel
* Catalin Marinas <catalin.marinas@arm.com> [100318 04:10]:
> On Wed, 2010-03-17 at 19:11 +0000, Tony Lindgren wrote:
> > * Catalin Marinas <catalin.marinas@arm.com> [100317 11:04]:
> > > On Wed, 2010-03-17 at 17:57 +0000, Tony Lindgren wrote:
> > > > Here's an updated version of this patch with more details.
> > > >
> > > > Looks like VFPv3 is only available on V7:
> > > >
> > > > http://www.arm.com/products/processors/technologies/vector-floating-point.php
> > >
> > > But does it cause any problem if the feature is enabled in the kernel?
> > > The vfp_init() code should check for its presence and set the hwcap
> > > accordingly.
> >
> > Yeah, it causes the problem posted in the patch description. I took a
> > quick look at it and at least the VFPFMRX in vfpmacros.h for CONFIG_VFPv3
> > is a problem.
>
> This would indeed need more checking to avoid reading some registers
> which aren't present on ARMv6.
>
> I think the main problem with just falling back to VFPv2 is the lack of
> NEON support even if the CPU supports it.
Yeah it would be nice to have things also working in a reasonably fast
and usable way for distros etc.
> > Also I think we would need to have separate vfp_get_double functions
> > in vfphw.S for VFPv2 and 3 that get used based on the features.
>
> I don't think that's causing problems (or at least we can identify where
> the function gets called for higher VFP registers). Even with VFPv3, you
> may not have the D16-D31 registers.
OK. There's also an ifdef else define for VFP_REG_ZERO in vfp.h. Sounds
like that test would also need to be done dynamically.
> > > > HAS_TLS reg is only on ARM11 starting with r1p0:
> > > >
> > > > http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
> > > >
> > > > So that explains why it won't work on omap2420 as it's r0p2.
> > >
> > > Same here, would it work with dynamic detection?
> >
> > Hmm I believe here the problem is __switch_to in entry-armv.S.
> > I don't think we want to dynamically test it every time.. Or
> > at least it would have to be optimized out in most cases.
>
> But if you disable this, you won't be able to use an SMP build on both
> v6 and v7. Anyway, I don't think that dynamically checking this would
> introduce performance penalties, the __switch_to code is pretty complex
> already with all the notifier calls.
OK. I'll take a look at setting the TLS a HWCAP flag.
> We may also have optimised user space that reads the TLS register
> directly rather than going through the kuser helper, in which case we
> would need a kernel built only for ARMv7 (maybe that's acceptable in
> this situation).
Sounds like more of a hassle to me :)
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-18 17:00 ` Tony Lindgren
@ 2010-03-19 1:35 ` Tony Lindgren
2010-03-19 3:24 ` Tony Lindgren
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-03-19 1:35 UTC (permalink / raw)
To: linux-arm-kernel
* Tony Lindgren <tony@atomide.com> [100318 09:55]:
> * Catalin Marinas <catalin.marinas@arm.com> [100318 04:10]:
> > On Wed, 2010-03-17 at 19:11 +0000, Tony Lindgren wrote:
> > > * Catalin Marinas <catalin.marinas@arm.com> [100317 11:04]:
> > > > On Wed, 2010-03-17 at 17:57 +0000, Tony Lindgren wrote:
> > > > > HAS_TLS reg is only on ARM11 starting with r1p0:
> > > > >
> > > > > http://infocenter.arm.com/help/topic/com.arm.doc.ddi0211k/Babeihid.html
> > > > >
> > > > > So that explains why it won't work on omap2420 as it's r0p2.
> > > >
> > > > Same here, would it work with dynamic detection?
> > >
> > > Hmm I believe here the problem is __switch_to in entry-armv.S.
> > > I don't think we want to dynamically test it every time.. Or
> > > at least it would have to be optimized out in most cases.
> >
> > But if you disable this, you won't be able to use an SMP build on both
> > v6 and v7. Anyway, I don't think that dynamically checking this would
> > introduce performance penalties, the __switch_to code is pretty complex
> > already with all the notifier calls.
>
> OK. I'll take a look at setting the TLS a HWCAP flag.
Below is a patch for convert CONFIG_HAS_TLS_REG into HWCAP_TLS.
I've tested it with V6 r0p2 with no HWCAP_TLS, and V7 that has HWCAP_TLS.
I also forced CONFIG_TLS_REG_EMUL and booted on V6 r0p2, and it booted OK.
Could somebody please test this patch on a real CONFIG_TLS_REG_EMUL
system?
Also, I wonder if the change __kuser_get_tls is safe?
I changed it to assume that if 0xffff0ff0 == 0, then we have HWCAP_TLS.
Regards,
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 1:35 ` [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
@ 2010-03-19 3:24 ` Tony Lindgren
2010-03-19 3:46 ` Jamie Lokier
2010-03-19 8:53 ` Russell King - ARM Linux
2 siblings, 0 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-03-19 3:24 UTC (permalink / raw)
To: linux-arm-kernel
* Tony Lindgren <tony@atomide.com> [100318 18:31]:
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -269,6 +269,24 @@ static void __init cacheid_init(void)
> extern struct proc_info_list *lookup_processor_type(unsigned int);
> extern struct machine_desc *lookup_machine_type(unsigned int);
>
> +#ifdef CONFIG_CPU_V6
> +static void __init feat_v6_fixup(void)
> +{
> + int id = read_cpuid_id();
> +
> + if (id & 0x000f0000 != 0x00070000)
> + return;
> +
> + /* HWCAP_TLS is available only on V6 r1p0 and later */
> + if (((id >> 20) & 3) == 0)
> + elf_hwcap &= ~HWCAP_TLS;
> +}
This test probably needs to only look at ARM1136, and ignore others
such as ARM1176. Will take a look tomorrow.
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 1:35 ` [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
2010-03-19 3:24 ` Tony Lindgren
@ 2010-03-19 3:46 ` Jamie Lokier
2010-03-19 8:54 ` Russell King - ARM Linux
2010-03-19 8:53 ` Russell King - ARM Linux
2 siblings, 1 reply; 11+ messages in thread
From: Jamie Lokier @ 2010-03-19 3:46 UTC (permalink / raw)
To: linux-arm-kernel
Tony Lindgren wrote:
> Also, I wonder if the change __kuser_get_tls is safe?
>
> + ldr r0, [pc, #(16 - 8)] @ TLS set at 0xffff0ff0?
> + cmp r0, #0 @ assume hw TLS if not set
> + mrceq p15, 0, r0, c13, c0, 3 @ read TLS register
You cannot assume the TLS value is non-zero, because it's provided by
userspace to use however it wants. It doesn't even have to be an address.
I'm thinking, why not an alternative() macro like on x86, which is a
very nice way to describe run-time patches of one or a few instructions
which depend on arch feature bits.
Then all that switch_to() logic could be made the size it was before.
An alternative() macro could make a lot of other chip-dependent calls
smaller too, i.e. all those which dispatch through function pointers
at present for cache flushing etc - they could become direct calls, or
an inline instruction or two when possible.
-- Jamie
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 3:46 ` Jamie Lokier
@ 2010-03-19 8:54 ` Russell King - ARM Linux
2010-03-19 15:32 ` Tony Lindgren
0 siblings, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-03-19 8:54 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Mar 19, 2010 at 03:46:45AM +0000, Jamie Lokier wrote:
> I'm thinking, why not an alternative() macro like on x86, which is a
> very nice way to describe run-time patches of one or a few instructions
> which depend on arch feature bits.
Having XIP support prevents that kind of thing.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 8:54 ` Russell King - ARM Linux
@ 2010-03-19 15:32 ` Tony Lindgren
0 siblings, 0 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-03-19 15:32 UTC (permalink / raw)
To: linux-arm-kernel
* Russell King - ARM Linux <linux@arm.linux.org.uk> [100319 01:50]:
> On Fri, Mar 19, 2010 at 03:46:45AM +0000, Jamie Lokier wrote:
> > I'm thinking, why not an alternative() macro like on x86, which is a
> > very nice way to describe run-time patches of one or a few instructions
> > which depend on arch feature bits.
>
> Having XIP support prevents that kind of thing.
How about we store the HWCAP_TLS flag into 0xffff0ff4 for
__kuser_get_tls? That way the userspace won't be able to set
it.
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 1:35 ` [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
2010-03-19 3:24 ` Tony Lindgren
2010-03-19 3:46 ` Jamie Lokier
@ 2010-03-19 8:53 ` Russell King - ARM Linux
2010-03-19 15:58 ` Tony Lindgren
2 siblings, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-03-19 8:53 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Mar 18, 2010 at 06:35:21PM -0700, Tony Lindgren wrote:
> -#if defined(CONFIG_HAS_TLS_REG)
> - mcr p15, 0, r3, c13, c0, 3 @ set TLS register
> -#elif !defined(CONFIG_TLS_REG_EMUL)
> - mov r4, #0xffff0fff
> - str r3, [r4, #-15] @ TLS val at 0xffff0ff0
> +#if !defined(CONFIG_TLS_REG_EMUL)
> + ldr r4, =elf_hwcap
> + ldr r4, [r4, #0]
> + tst r4, #HWCAP_TLS @ hardware with TLS?
This is really really inefficient. Both the second ldr and tst will stall
the pipeline because they need to wait for the result of the precending
ldr. Can we do better by re-ordering some instructions?
Also, the ifndef seems incorrect - if we have TLS_REG_EMUL we seem to omit
all this code.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 8:53 ` Russell King - ARM Linux
@ 2010-03-19 15:58 ` Tony Lindgren
2010-03-23 0:16 ` Russell King - ARM Linux
0 siblings, 1 reply; 11+ messages in thread
From: Tony Lindgren @ 2010-03-19 15:58 UTC (permalink / raw)
To: linux-arm-kernel
* Russell King - ARM Linux <linux@arm.linux.org.uk> [100319 01:49]:
> On Thu, Mar 18, 2010 at 06:35:21PM -0700, Tony Lindgren wrote:
> > -#if defined(CONFIG_HAS_TLS_REG)
> > - mcr p15, 0, r3, c13, c0, 3 @ set TLS register
> > -#elif !defined(CONFIG_TLS_REG_EMUL)
> > - mov r4, #0xffff0fff
> > - str r3, [r4, #-15] @ TLS val at 0xffff0ff0
> > +#if !defined(CONFIG_TLS_REG_EMUL)
> > + ldr r4, =elf_hwcap
> > + ldr r4, [r4, #0]
> > + tst r4, #HWCAP_TLS @ hardware with TLS?
>
> This is really really inefficient. Both the second ldr and tst will stall
> the pipeline because they need to wait for the result of the precending
> ldr. Can we do better by re-ordering some instructions?
Or set ifdef CONFIG_CPU_V6 and test for the cp15 id register every time..
> Also, the ifndef seems incorrect - if we have TLS_REG_EMUL we seem to omit
> all this code.
Is the current ifdef elif wrong? The current code does not seem to
do anything if TLS_REG_EMUL is set and HAS_TLS_REG is not set.
HAS_TLS_REG depends !TLS_REG_EMUL.
Regards,
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-19 15:58 ` Tony Lindgren
@ 2010-03-23 0:16 ` Russell King - ARM Linux
2010-03-23 0:54 ` Tony Lindgren
0 siblings, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-03-23 0:16 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Mar 19, 2010 at 08:58:05AM -0700, Tony Lindgren wrote:
> * Russell King - ARM Linux <linux@arm.linux.org.uk> [100319 01:49]:
> > On Thu, Mar 18, 2010 at 06:35:21PM -0700, Tony Lindgren wrote:
> > > -#if defined(CONFIG_HAS_TLS_REG)
> > > - mcr p15, 0, r3, c13, c0, 3 @ set TLS register
> > > -#elif !defined(CONFIG_TLS_REG_EMUL)
> > > - mov r4, #0xffff0fff
> > > - str r3, [r4, #-15] @ TLS val at 0xffff0ff0
> > > +#if !defined(CONFIG_TLS_REG_EMUL)
> > > + ldr r4, =elf_hwcap
> > > + ldr r4, [r4, #0]
> > > + tst r4, #HWCAP_TLS @ hardware with TLS?
> >
> > This is really really inefficient. Both the second ldr and tst will stall
> > the pipeline because they need to wait for the result of the precending
> > ldr. Can we do better by re-ordering some instructions?
>
> Or set ifdef CONFIG_CPU_V6 and test for the cp15 id register every time..
I was suggesting that it might be worth trying to reorder the instructions
here so that we're not immediately using the result of the ldr in the
next instruction. We have plenty of registers available here (everything
except r0-r2, r6, fp.)
> > Also, the ifndef seems incorrect - if we have TLS_REG_EMUL we seem to omit
> > all this code.
>
> Is the current ifdef elif wrong? The current code does not seem to
> do anything if TLS_REG_EMUL is set and HAS_TLS_REG is not set.
> HAS_TLS_REG depends !TLS_REG_EMUL.
Now I look back, I don't think so. Ignore that comment.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
2010-03-23 0:16 ` Russell King - ARM Linux
@ 2010-03-23 0:54 ` Tony Lindgren
0 siblings, 0 replies; 11+ messages in thread
From: Tony Lindgren @ 2010-03-23 0:54 UTC (permalink / raw)
To: linux-arm-kernel
* Russell King - ARM Linux <linux@arm.linux.org.uk> [100322 17:12]:
> On Fri, Mar 19, 2010 at 08:58:05AM -0700, Tony Lindgren wrote:
> > * Russell King - ARM Linux <linux@arm.linux.org.uk> [100319 01:49]:
> > > On Thu, Mar 18, 2010 at 06:35:21PM -0700, Tony Lindgren wrote:
> > > > -#if defined(CONFIG_HAS_TLS_REG)
> > > > - mcr p15, 0, r3, c13, c0, 3 @ set TLS register
> > > > -#elif !defined(CONFIG_TLS_REG_EMUL)
> > > > - mov r4, #0xffff0fff
> > > > - str r3, [r4, #-15] @ TLS val at 0xffff0ff0
> > > > +#if !defined(CONFIG_TLS_REG_EMUL)
> > > > + ldr r4, =elf_hwcap
> > > > + ldr r4, [r4, #0]
> > > > + tst r4, #HWCAP_TLS @ hardware with TLS?
> > >
> > > This is really really inefficient. Both the second ldr and tst will stall
> > > the pipeline because they need to wait for the result of the precending
> > > ldr. Can we do better by re-ordering some instructions?
> >
> > Or set ifdef CONFIG_CPU_V6 and test for the cp15 id register every time..
>
> I was suggesting that it might be worth trying to reorder the instructions
> here so that we're not immediately using the result of the ldr in the
> next instruction. We have plenty of registers available here (everything
> except r0-r2, r6, fp.)
Yeah sure, I'll take a look. I'll repost an updated version after I get a
chance to play with this again. Might be a little while before I get back to
this, but this would be for the next merge window anyways.
> > > Also, the ifndef seems incorrect - if we have TLS_REG_EMUL we seem to omit
> > > all this code.
> >
> > Is the current ifdef elif wrong? The current code does not seem to
> > do anything if TLS_REG_EMUL is set and HAS_TLS_REG is not set.
> > HAS_TLS_REG depends !TLS_REG_EMUL.
>
> Now I look back, I don't think so. Ignore that comment.
OK
Regards,
Tony
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-06-29 10:34 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-29 10:34 [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
-- strict thread matches above, loose matches on Subject: below --
2010-03-18 19:02 Tony Lindgren
2010-03-17 17:57 [PATCH] arm: Fix mounting root on omaps with CPU_V6 and CPU_V7 Tony Lindgren
2010-03-17 18:07 ` Catalin Marinas
2010-03-17 19:11 ` Tony Lindgren
2010-03-18 11:13 ` Catalin Marinas
2010-03-18 17:00 ` Tony Lindgren
2010-03-19 1:35 ` [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
2010-03-19 3:24 ` Tony Lindgren
2010-03-19 3:46 ` Jamie Lokier
2010-03-19 8:54 ` Russell King - ARM Linux
2010-03-19 15:32 ` Tony Lindgren
2010-03-19 8:53 ` Russell King - ARM Linux
2010-03-19 15:58 ` Tony Lindgren
2010-03-23 0:16 ` Russell King - ARM Linux
2010-03-23 0:54 ` Tony Lindgren
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).