linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Make ARMv6 behave with TLS, VFPv3, and NEON
@ 2010-06-21 13:51 Tony Lindgren
  2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
  2010-06-21 13:51 ` [PATCH 2/2] arm: Make VFPv3 usable on ARMv6 Tony Lindgren
  0 siblings, 2 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-06-21 13:51 UTC (permalink / raw)
  To: linux-arm-kernel

Hi all,

Here is an updated version of the earlier patch for the TLS [1], and a related
patch for VFPv3 and NEON. Sorry it took a while before I got around updating
this patch.

This series allows booting ARMv6 and 7 with the same kernel binary, such as
omap24xx (ARMv6), omap34xx (ARMv7) and omap44xx (ARMv7 SMP).

To summarize the problem, ARM1136 has TLS and MVFR registers only starting with
r1 p0, and at least omap2420 is earlier at r0 p2 and does not have TLS or
MVFR registers.

Regards,

Tony


[1] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
    http://lists.infradead.org/pipermail/linux-arm-kernel/2010-March/011667.html

---

Tony Lindgren (2):
      arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
      arm: Make VFPv3 usable on ARMv6


 arch/arm/include/asm/hwcap.h     |    1 +
 arch/arm/include/asm/vfpmacros.h |   18 ++++++++++++++++++
 arch/arm/kernel/entry-armv.S     |   29 ++++++++++++++---------------
 arch/arm/kernel/setup.c          |   20 ++++++++++++++++++++
 arch/arm/kernel/traps.c          |   23 +++++++++++++----------
 arch/arm/mm/Kconfig              |   11 -----------
 arch/arm/mm/proc-v6.S            |    6 ++++--
 arch/arm/mm/proc-v7.S            |    2 +-
 arch/arm/vfp/vfpmodule.c         |   10 +++++++---
 9 files changed, 78 insertions(+), 42 deletions(-)

-- 
Signature

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-21 13:51 [PATCH 0/2] Make ARMv6 behave with TLS, VFPv3, and NEON Tony Lindgren
@ 2010-06-21 13:51 ` Tony Lindgren
  2010-06-22  9:28   ` Tony Lindgren
  2010-06-22 17:00   ` Jamie Lokier
  2010-06-21 13:51 ` [PATCH 2/2] arm: Make VFPv3 usable on ARMv6 Tony Lindgren
  1 sibling, 2 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-06-21 13:51 UTC (permalink / raw)
  To: linux-arm-kernel

The TLS register is only available on ARM1136 r1p0 and later.
Set HWCAP_TLS flags if hardware TLS is available.

Note that we now use 0xffff0ff4 for flagging software TLS to
__kuser_get_tls, and 0xffff0ff8 for storing the software TLS value.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/include/asm/hwcap.h |    1 +
 arch/arm/kernel/entry-armv.S |   29 ++++++++++++++---------------
 arch/arm/kernel/setup.c      |   20 ++++++++++++++++++++
 arch/arm/kernel/traps.c      |   23 +++++++++++++----------
 arch/arm/mm/Kconfig          |   11 -----------
 arch/arm/mm/proc-v6.S        |    6 ++++--
 arch/arm/mm/proc-v7.S        |    2 +-
 7 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index f7bd52b..c1062c3 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -19,6 +19,7 @@
 #define HWCAP_NEON	4096
 #define HWCAP_VFPv3	8192
 #define HWCAP_VFPv3D16	16384
+#define HWCAP_TLS	32768
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7ee48e7..9de5357 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -739,11 +739,14 @@ ENTRY(__switch_to)
 #ifdef CONFIG_MMU
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-#if defined(CONFIG_HAS_TLS_REG)
-	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
-#elif !defined(CONFIG_TLS_REG_EMUL)
-	mov	r4, #0xffff0fff
-	str	r3, [r4, #-15]			@ TLS val at 0xffff0ff0
+#if !defined(CONFIG_TLS_REG_EMUL)
+	ldr	r4, =elf_hwcap
+	ldr	r4, [r4, #0]
+	mov	r5, #0xffff0fff
+	tst	r4, #HWCAP_TLS			@ hardware TLS available?
+	mcrne	p15, 0, r3, c13, c0, 3		@ yes, set TLS register
+	streq	r5, [r5, #-11]			@ flag software TLS at 0xffff0ff4
+	streq	r3, [r5, #-7]			@ set TLS value at 0xffff0ff8
 #endif
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
@@ -1009,17 +1012,13 @@ kuser_cmpxchg_fixup:
  */
 
 __kuser_get_tls:				@ 0xffff0fe0
-
-#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
-	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
-#else
-	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
-#endif
+	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
+	cmp	r0, #0				@ hardware TLS if flag not set
+	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
+	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
 	usr_ret	lr
-
-	.rep	5
-	.word	0			@ pad up to __kuser_helper_version
-	.endr
+	.word	0				@ non-zero for software TLS
+	.word	0				@ software TLS value
 
 /*
  * Reference declaration:
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 122d999..fcfa9c2 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -269,6 +269,24 @@ static void __init cacheid_init(void)
 extern struct proc_info_list *lookup_processor_type(unsigned int);
 extern struct machine_desc *lookup_machine_type(unsigned int);
 
+#ifdef CONFIG_CPU_V6
+static void __init feat_v6_fixup(void)
+{
+	int id = read_cpuid_id();
+
+	if (id & 0x000f0000 != 0x00070000)
+		return;
+
+	/* HWCAP_TLS is available only on V6 r1p0 and later */
+	if (((id >> 20) & 3) == 0)
+		elf_hwcap &= ~HWCAP_TLS;
+}
+#else
+static inline void feat_v6_fixup(void)
+{
+}
+#endif
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -311,6 +329,8 @@ static void __init setup_processor(void)
 	elf_hwcap &= ~HWCAP_THUMB;
 #endif
 
+	feat_v6_fixup();
+
 	cacheid_init();
 	cpu_proc_init();
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1621e53..3dd72b7 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -518,16 +518,19 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 
 	case NR(set_tls):
 		thread->tp_value = regs->ARM_r0;
-#if defined(CONFIG_HAS_TLS_REG)
-		asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
-#elif !defined(CONFIG_TLS_REG_EMUL)
-		/*
-		 * User space must never try to access this directly.
-		 * Expect your app to break eventually if you do so.
-		 * The user helper at 0xffff0fe0 must be used instead.
-		 * (see entry-armv.S for details)
-		 */
-		*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+#if !defined(CONFIG_TLS_REG_EMUL)
+		if (elf_hwcap & HWCAP_TLS) {
+			asm ("mcr p15, 0, %0, c13, c0, 3"
+				: : "r" (regs->ARM_r0));
+		} else {
+			/*
+			 * User space must never try to access this directly.
+			 * Expect your app to break eventually if you do so.
+			 * The user helper at 0xffff0fe0 must be used instead.
+			 * (see entry-armv.S for details)
+			 */
+			*((unsigned int *)0xffff0ff8) = regs->ARM_r0;
+		}
 #endif
 		return 0;
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 346ae14..71d5d5e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -717,17 +717,6 @@ config TLS_REG_EMUL
 	  a few prototypes like that in existence) and therefore access to
 	  that required register must be emulated.
 
-config HAS_TLS_REG
-	bool
-	depends on !TLS_REG_EMUL
-	default y if SMP || CPU_32v7
-	help
-	  This selects support for the CP15 thread register.
-	  It is defined to be available on some ARMv6 processors (including
-	  all SMP capable ARMv6's) or later processors.  User space may
-	  assume directly accessing that register and always obtain the
-	  expected value only on ARMv7 and above.
-
 config NEEDS_SYSCALL_FOR_CMPXCHG
 	bool
 	help
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 7a5337e..e10626a 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -239,7 +239,8 @@ __v6_proc_info:
 	b	__v6_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	/* See also feat_v6_fixup() for HWCAP_TLS */
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
 	.long	cpu_v6_name
 	.long	v6_processor_functions
 	.long	v6wbi_tlb_fns
@@ -262,7 +263,8 @@ __pj4_v6_proc_info:
 	b	__v6_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	/* See also feat_v6_fixup() for HWCAP_TLS */
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
 	.long	cpu_pj4_name
 	.long	v6_processor_functions
 	.long	v6wbi_tlb_fns
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7aaf88a..8071bcd 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -344,7 +344,7 @@ __v7_proc_info:
 	b	__v7_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
 	.long	cpu_v7_name
 	.long	v7_processor_functions
 	.long	v7wbi_tlb_fns

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-21 13:51 [PATCH 0/2] Make ARMv6 behave with TLS, VFPv3, and NEON Tony Lindgren
  2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
@ 2010-06-21 13:51 ` Tony Lindgren
  2010-06-22 12:59   ` Catalin Marinas
  1 sibling, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-21 13:51 UTC (permalink / raw)
  To: linux-arm-kernel

MVFR0 and MVFR1 are only available starting with ARM1136 r1p0 release
according to "B.5 VFP changes" in DDI0211F_arm1136_r1p0_trm.pdf. This is
also when TLS register got added, so we can use HAS_TLS also to test for
MVFR0 and MVFR1.

Otherwise VFPFMRX and VFPFMXR access fails and we get:

Internal error: Oops - undefined instruction: 0 [#1]
PC is at no_old_VFP_process+0x8/0x3c
LR is at __und_svc+0x48/0x80
...

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/include/asm/vfpmacros.h |   18 ++++++++++++++++++
 arch/arm/vfp/vfpmodule.c         |   10 +++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
index 422f3cc..3d5fc41 100644
--- a/arch/arm/include/asm/vfpmacros.h
+++ b/arch/arm/include/asm/vfpmacros.h
@@ -3,6 +3,8 @@
  *
  * Assembler-only file containing VFP macros and register definitions.
  */
+#include <asm/hwcap.h>
+
 #include "vfp.h"
 
 @ Macros to allow building with old toolkits (with no VFP support)
@@ -22,12 +24,20 @@
 	LDC	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d0-d15}
 #endif
 #ifdef CONFIG_VFPv3
+#if __LINUX_ARM_ARCH__ <= 6
+	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
+	ldr	\tmp, [\tmp, #0]
+	tst	\tmp, #HWCAP_VFPv3D16
+	ldceq	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+	addne	\base, \base, #32*4		    @ step over unused register space
+#else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
 	cmp	\tmp, #2			    @ 32 x 64bit registers?
 	ldceql	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
 	addne	\base, \base, #32*4		    @ step over unused register space
 #endif
+#endif
 	.endm
 
 	@ write all the working registers out of the VFP
@@ -38,10 +48,18 @@
 	STC	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d0-d15}
 #endif
 #ifdef CONFIG_VFPv3
+#if __LINUX_ARM_ARCH__ <= 6
+	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
+	ldr	\tmp, [\tmp, #0]
+	tst	\tmp, #HWCAP_VFPv3D16
+	stceq	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+	addne	\base, \base, #32*4		    @ step over unused register space
+#else
 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
 	and	\tmp, \tmp, #MVFR0_A_SIMD_MASK	    @ A_SIMD field
 	cmp	\tmp, #2			    @ 32 x 64bit registers?
 	stceql	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
 	addne	\base, \base, #32*4		    @ step over unused register space
 #endif
+#endif
 	.endm
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 315a540..19ba626 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -549,10 +549,14 @@ static int __init vfp_init(void)
 		/*
 		 * Check for the presence of the Advanced SIMD
 		 * load/store instructions, integer and single
-		 * precision floating point operations.
+		 * precision floating point operations. Only check
+		 * for NEON if the hardware has TLS register as the
+		 * MVFR registers got added in ARM1136 r1p0 with TLS.
 		 */
-		if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
-			elf_hwcap |= HWCAP_NEON;
+		if (elf_hwcap & HWCAP_TLS) {
+			if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+				elf_hwcap |= HWCAP_NEON;
+		}
 #endif
 	}
 	return 0;

^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
@ 2010-06-22  9:28   ` Tony Lindgren
  2010-06-22 17:00   ` Jamie Lokier
  1 sibling, 0 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-06-22  9:28 UTC (permalink / raw)
  To: linux-arm-kernel

* Tony Lindgren <tony@atomide.com> [100621 16:45]:
> The TLS register is only available on ARM1136 r1p0 and later.
> Set HWCAP_TLS flags if hardware TLS is available.
> 
> Note that we now use 0xffff0ff4 for flagging software TLS to
> __kuser_get_tls, and 0xffff0ff8 for storing the software TLS value.

Forgot my own earlier comment to check only for ARM1136, will update
with this patch.

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: tls-1136.patch
Type: text/x-diff
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100622/94d5fc78/attachment.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-21 13:51 ` [PATCH 2/2] arm: Make VFPv3 usable on ARMv6 Tony Lindgren
@ 2010-06-22 12:59   ` Catalin Marinas
  2010-06-22 13:20     ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Catalin Marinas @ 2010-06-22 12:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 2010-06-21 at 14:51 +0100, Tony Lindgren wrote:
> diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> index 315a540..19ba626 100644
> --- a/arch/arm/vfp/vfpmodule.c
> +++ b/arch/arm/vfp/vfpmodule.c
> @@ -549,10 +549,14 @@ static int __init vfp_init(void)
>                 /*
>                  * Check for the presence of the Advanced SIMD
>                  * load/store instructions, integer and single
> -                * precision floating point operations.
> +                * precision floating point operations. Only check
> +                * for NEON if the hardware has TLS register as the
> +                * MVFR registers got added in ARM1136 r1p0 with TLS.
>                  */
> -               if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> -                       elf_hwcap |= HWCAP_NEON;
> +               if (elf_hwcap & HWCAP_TLS) {
> +                       if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> +                               elf_hwcap |= HWCAP_NEON;
> +               }

MVFR should be available with the new CPU id format:

	((read_cpuid_id() & 0x000f0000) == 0x000f0000)

I think that would be a cleaner test than relying on the TLS presence.

-- 
Catalin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-22 12:59   ` Catalin Marinas
@ 2010-06-22 13:20     ` Tony Lindgren
  2010-06-23  7:57       ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-22 13:20 UTC (permalink / raw)
  To: linux-arm-kernel

* Catalin Marinas <catalin.marinas@arm.com> [100622 15:53]:
> On Mon, 2010-06-21 at 14:51 +0100, Tony Lindgren wrote:
> > diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> > index 315a540..19ba626 100644
> > --- a/arch/arm/vfp/vfpmodule.c
> > +++ b/arch/arm/vfp/vfpmodule.c
> > @@ -549,10 +549,14 @@ static int __init vfp_init(void)
> >                 /*
> >                  * Check for the presence of the Advanced SIMD
> >                  * load/store instructions, integer and single
> > -                * precision floating point operations.
> > +                * precision floating point operations. Only check
> > +                * for NEON if the hardware has TLS register as the
> > +                * MVFR registers got added in ARM1136 r1p0 with TLS.
> >                  */
> > -               if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> > -                       elf_hwcap |= HWCAP_NEON;
> > +               if (elf_hwcap & HWCAP_TLS) {
> > +                       if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> > +                               elf_hwcap |= HWCAP_NEON;
> > +               }
> 
> MVFR should be available with the new CPU id format:
> 
> 	((read_cpuid_id() & 0x000f0000) == 0x000f0000)
> 
> I think that would be a cleaner test than relying on the TLS presence.

OK, thanks that's better. Will update accordingly and repost.

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
  2010-06-22  9:28   ` Tony Lindgren
@ 2010-06-22 17:00   ` Jamie Lokier
  2010-06-23  7:39     ` Tony Lindgren
  1 sibling, 1 reply; 33+ messages in thread
From: Jamie Lokier @ 2010-06-22 17:00 UTC (permalink / raw)
  To: linux-arm-kernel

Tony Lindgren wrote:
>  __kuser_get_tls:				@ 0xffff0fe0
> -
> -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> -#else
> -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> -#endif
> +	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
> +	cmp	r0, #0				@ hardware TLS if flag not set
> +	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
> +	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
>  	usr_ret	lr
> -
> -	.rep	5
> -	.word	0			@ pad up to __kuser_helper_version
> -	.endr
> +	.word	0				@ non-zero for software TLS
> +	.word	0				@ software TLS value

It'd be nice not to waste instructions checking for HWCAP_TLS on archs
which definitely don't have it.  I guess it doesn't matter elsewhere;
I'd expect this to be a warm path for some programs making extensive
use of TLS (I haven't measured though).

As it's only a single instruction, and the code is in a writable page
already (copied at init), how about just patching the instruction
when ELF_HWCAP is set?

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-22 17:00   ` Jamie Lokier
@ 2010-06-23  7:39     ` Tony Lindgren
  2010-06-23  8:12       ` Russell King - ARM Linux
  2010-06-23 13:36       ` Jamie Lokier
  0 siblings, 2 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-06-23  7:39 UTC (permalink / raw)
  To: linux-arm-kernel

* Jamie Lokier <jamie@shareable.org> [100622 19:54]:
> Tony Lindgren wrote:
> >  __kuser_get_tls:				@ 0xffff0fe0
> > -
> > -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> > -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> > -#else
> > -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> > -#endif
> > +	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
> > +	cmp	r0, #0				@ hardware TLS if flag not set
> > +	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
> > +	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
> >  	usr_ret	lr
> > -
> > -	.rep	5
> > -	.word	0			@ pad up to __kuser_helper_version
> > -	.endr
> > +	.word	0				@ non-zero for software TLS
> > +	.word	0				@ software TLS value
> 
> It'd be nice not to waste instructions checking for HWCAP_TLS on archs
> which definitely don't have it.  I guess it doesn't matter elsewhere;
> I'd expect this to be a warm path for some programs making extensive
> use of TLS (I haven't measured though).

OK, but let's try to figure out a way that does not add more ifdef else
code as that makes it harder to build support for multiple ARM cores.
 
> As it's only a single instruction, and the code is in a writable page
> already (copied at init), how about just patching the instruction
> when ELF_HWCAP is set?

Yeah that can be done for __kuser_get_tls if it's always writable.
But __switch_to is trickier because of the CONFIG_MMU ifdefs there.

What if we have optional __switch_to and __kuser_get_tls implementations
in the mm/proc-*.S files that get copied over the current locations
if implemented?

Regards,

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-22 13:20     ` Tony Lindgren
@ 2010-06-23  7:57       ` Tony Lindgren
  2010-06-25 13:50         ` Catalin Marinas
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-23  7:57 UTC (permalink / raw)
  To: linux-arm-kernel

* Tony Lindgren <tony@atomide.com> [100622 16:15]:
> * Catalin Marinas <catalin.marinas@arm.com> [100622 15:53]:
> > On Mon, 2010-06-21 at 14:51 +0100, Tony Lindgren wrote:
> > > diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> > > index 315a540..19ba626 100644
> > > --- a/arch/arm/vfp/vfpmodule.c
> > > +++ b/arch/arm/vfp/vfpmodule.c
> > > @@ -549,10 +549,14 @@ static int __init vfp_init(void)
> > >                 /*
> > >                  * Check for the presence of the Advanced SIMD
> > >                  * load/store instructions, integer and single
> > > -                * precision floating point operations.
> > > +                * precision floating point operations. Only check
> > > +                * for NEON if the hardware has TLS register as the
> > > +                * MVFR registers got added in ARM1136 r1p0 with TLS.
> > >                  */
> > > -               if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> > > -                       elf_hwcap |= HWCAP_NEON;
> > > +               if (elf_hwcap & HWCAP_TLS) {
> > > +                       if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
> > > +                               elf_hwcap |= HWCAP_NEON;
> > > +               }
> > 
> > MVFR should be available with the new CPU id format:
> > 
> > 	((read_cpuid_id() & 0x000f0000) == 0x000f0000)
> > 
> > I think that would be a cleaner test than relying on the TLS presence.
> 
> OK, thanks that's better. Will update accordingly and repost.

Here's this one updated. Using your better MVFR check also removes the
dependency to the previous patch.

Regards,

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: vfpv3-armv6-fix.patch
Type: text/x-diff
Size: 3323 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100623/75317dfe/attachment.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23  7:39     ` Tony Lindgren
@ 2010-06-23  8:12       ` Russell King - ARM Linux
  2010-06-23  9:28         ` Tony Lindgren
  2010-06-23 13:36       ` Jamie Lokier
  1 sibling, 1 reply; 33+ messages in thread
From: Russell King - ARM Linux @ 2010-06-23  8:12 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jun 23, 2010 at 10:39:13AM +0300, Tony Lindgren wrote:
> Yeah that can be done for __kuser_get_tls if it's always writable.
> But __switch_to is trickier because of the CONFIG_MMU ifdefs there.

And impossible with XIP kernels.

> What if we have optional __switch_to and __kuser_get_tls implementations
> in the mm/proc-*.S files that get copied over the current locations
> if implemented?

Also problematical with XIP - if we go down the route of implementing
these by copying code fragments into the kernel, we need to strip out
XIP support or implement a second way.  Obviously having a second way
adds maintainence burden, and the second way will probably lose out
on updates.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23  8:12       ` Russell King - ARM Linux
@ 2010-06-23  9:28         ` Tony Lindgren
  2010-06-23  9:32           ` Russell King - ARM Linux
  2010-06-23 13:28           ` Jamie Lokier
  0 siblings, 2 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-06-23  9:28 UTC (permalink / raw)
  To: linux-arm-kernel

* Russell King - ARM Linux <linux@arm.linux.org.uk> [100623 11:06]:
> On Wed, Jun 23, 2010 at 10:39:13AM +0300, Tony Lindgren wrote:
> > Yeah that can be done for __kuser_get_tls if it's always writable.
> > But __switch_to is trickier because of the CONFIG_MMU ifdefs there.
> 
> And impossible with XIP kernels.
> 
> > What if we have optional __switch_to and __kuser_get_tls implementations
> > in the mm/proc-*.S files that get copied over the current locations
> > if implemented?
> 
> Also problematical with XIP - if we go down the route of implementing
> these by copying code fragments into the kernel, we need to strip out
> XIP support or implement a second way.  Obviously having a second way
> adds maintainence burden, and the second way will probably lose out
> on updates.

How about if we implement the default XIP-safe unoptimized functions,
with minimal iffdeffery and then allow optional override for non-XIP
kernels from mm/proc-*.S files?

Regards,

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23  9:28         ` Tony Lindgren
@ 2010-06-23  9:32           ` Russell King - ARM Linux
  2010-06-23 13:28           ` Jamie Lokier
  1 sibling, 0 replies; 33+ messages in thread
From: Russell King - ARM Linux @ 2010-06-23  9:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Jun 23, 2010 at 12:28:44PM +0300, Tony Lindgren wrote:
> * Russell King - ARM Linux <linux@arm.linux.org.uk> [100623 11:06]:
> > On Wed, Jun 23, 2010 at 10:39:13AM +0300, Tony Lindgren wrote:
> > > Yeah that can be done for __kuser_get_tls if it's always writable.
> > > But __switch_to is trickier because of the CONFIG_MMU ifdefs there.
> > 
> > And impossible with XIP kernels.
> > 
> > > What if we have optional __switch_to and __kuser_get_tls implementations
> > > in the mm/proc-*.S files that get copied over the current locations
> > > if implemented?
> > 
> > Also problematical with XIP - if we go down the route of implementing
> > these by copying code fragments into the kernel, we need to strip out
> > XIP support or implement a second way.  Obviously having a second way
> > adds maintainence burden, and the second way will probably lose out
> > on updates.
> 
> How about if we implement the default XIP-safe unoptimized functions,
> with minimal iffdeffery and then allow optional override for non-XIP
> kernels from mm/proc-*.S files?

That's what I was referring to by "second way".

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23  9:28         ` Tony Lindgren
  2010-06-23  9:32           ` Russell King - ARM Linux
@ 2010-06-23 13:28           ` Jamie Lokier
  1 sibling, 0 replies; 33+ messages in thread
From: Jamie Lokier @ 2010-06-23 13:28 UTC (permalink / raw)
  To: linux-arm-kernel

Tony Lindgren wrote:
> > Also problematical with XIP - if we go down the route of implementing
> > these by copying code fragments into the kernel, we need to strip out
> > XIP support or implement a second way.  Obviously having a second way
> > adds maintainence burden, and the second way will probably lose out
> > on updates.
> 
> How about if we implement the default XIP-safe unoptimized functions,
> with minimal iffdeffery and then allow optional override for non-XIP
> kernels from mm/proc-*.S files?

That might be a plan for some things like cache maintenance - skip a
function call when not needed, call direct instead of indirect, but
for XIP that status quo must remain.

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23  7:39     ` Tony Lindgren
  2010-06-23  8:12       ` Russell King - ARM Linux
@ 2010-06-23 13:36       ` Jamie Lokier
  2010-06-23 14:19         ` Nicolas Pitre
  2010-06-29 14:18         ` Tony Lindgren
  1 sibling, 2 replies; 33+ messages in thread
From: Jamie Lokier @ 2010-06-23 13:36 UTC (permalink / raw)
  To: linux-arm-kernel

Tony Lindgren wrote:
> * Jamie Lokier <jamie@shareable.org> [100622 19:54]:
> > Tony Lindgren wrote:
> > >  __kuser_get_tls:				@ 0xffff0fe0
> > > -
> > > -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> > > -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> > > -#else
> > > -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> > > -#endif
> > > +	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
> > > +	cmp	r0, #0				@ hardware TLS if flag not set
> > > +	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
> > > +	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
> > >  	usr_ret	lr
> > > -
> > > -	.rep	5
> > > -	.word	0			@ pad up to __kuser_helper_version
> > > -	.endr
> > > +	.word	0				@ non-zero for software TLS
> > > +	.word	0				@ software TLS value
> > 
> > It'd be nice not to waste instructions checking for HWCAP_TLS on archs
> > which definitely don't have it.  I guess it doesn't matter elsewhere;
> > I'd expect this to be a warm path for some programs making extensive
> > use of TLS (I haven't measured though).
> 
> OK, but let's try to figure out a way that does not add more ifdef else
> code as that makes it harder to build support for multiple ARM cores.
>  
> > As it's only a single instruction, and the code is in a writable page
> > already (copied at init), how about just patching the instruction
> > when ELF_HWCAP is set?
> 
> Yeah that can be done for __kuser_get_tls if it's always writable.
> But __switch_to is trickier because of the CONFIG_MMU ifdefs there.

__kuser_get_tls must be writable in kernels where !HAS_TLS_REG is
supported, because the TLS value is written to the same page.

I was thinking of changing *only* __kuser_get_tls, by the way.  Out of
all the different places, that's the only one I'd expect to be a hot
path in some TLS-using programs.

> What if we have optional __switch_to and __kuser_get_tls implementations
> in the mm/proc-*.S files that get copied over the current locations
> if implemented?

As __kuser_get_tls varies by only ones instruction, I don't think
there's any point doing anything other than a single word write, at
the point where the HWCAP is set, with its initial value being the
!HAS_TLS_REG instruction.

For other things like __switch_to and maybe cache maintenance calls,
dmb() etc, I'd suggest first doing a generic asm mechanism like x86's
"alternatives", keeping in mind that one of the alternatives has to be
XIP friendly.

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23 13:36       ` Jamie Lokier
@ 2010-06-23 14:19         ` Nicolas Pitre
  2010-06-24  0:28           ` Jamie Lokier
  2010-06-29 14:18         ` Tony Lindgren
  1 sibling, 1 reply; 33+ messages in thread
From: Nicolas Pitre @ 2010-06-23 14:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 23 Jun 2010, Jamie Lokier wrote:

> For other things like __switch_to and maybe cache maintenance calls,
> dmb() etc, I'd suggest first doing a generic asm mechanism like x86's
> "alternatives", keeping in mind that one of the alternatives has to be
> XIP friendly.

You cannot be XIP friendly unless you rewrite the concerned function(s) 
into a RAM page in order to modify it.  And then of course you have to 
play tricks with the linker so that the rewritten functions are 
referenced with their final rewritten location.  And that means of 
course that the RAM location has to be a constant unless we introduce 
some indirect function calls.  And at that point we're not much better 
than the proposed runtime test.

Also, if you are interested by a XIP kernel, this usually means you have 
a fairly highly customized kernel config.  In this context it is 
senseless to have runtime patching of the kernel.  What you want in that 
case is as much stuff as possible selected and optimized at compile 
time, and anything you don't need configured out.

So it would be best if dynamic function patching could be statically 
generated from the same source when XIP is active.


Nicolas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23 14:19         ` Nicolas Pitre
@ 2010-06-24  0:28           ` Jamie Lokier
  0 siblings, 0 replies; 33+ messages in thread
From: Jamie Lokier @ 2010-06-24  0:28 UTC (permalink / raw)
  To: linux-arm-kernel

Nicolas Pitre wrote:
> On Wed, 23 Jun 2010, Jamie Lokier wrote:
> 
> > For other things like __switch_to and maybe cache maintenance calls,
> > dmb() etc, I'd suggest first doing a generic asm mechanism like x86's
> > "alternatives", keeping in mind that one of the alternatives has to be
> > XIP friendly.
> 
> You cannot be XIP friendly unless you rewrite the concerned function(s) 
> into a RAM page in order to modify it.  And then of course you have to 
> play tricks with the linker so that the rewritten functions are 
> referenced with their final rewritten location.  And that means of 
> course that the RAM location has to be a constant unless we introduce 
> some indirect function calls.  And at that point we're not much better 
> than the proposed runtime test.
> 
> Also, if you are interested by a XIP kernel, this usually means you have 
> a fairly highly customized kernel config.  In this context it is 
> senseless to have runtime patching of the kernel.  What you want in that 
> case is as much stuff as possible selected and optimized at compile 
> time, and anything you don't need configured out.

I wasn't suggesting that XIP kernels do any run-time npatching.
Sorry, I thought that was obvious.

What I mean is to provide a set of macros, a bit like x86's
alternatives macros, which take various asm fragments and the
condition which would choose between them, and compiles to
unconditional code if the condition is known at compile time (XIP or
not), otherwise compiles to fixed, run-time conditional code on XIP
(it could be conditional code or an indirect functional call), and on
non-XIP compiles to patchable code with an alternate-table section.

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-23  7:57       ` Tony Lindgren
@ 2010-06-25 13:50         ` Catalin Marinas
  2010-07-01 12:42           ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Catalin Marinas @ 2010-06-25 13:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 2010-06-23 at 08:57 +0100, Tony Lindgren wrote:
> MVFR0 and MVFR1 are only available starting with ARM1136 r1p0 release
> according to "B.5 VFP changes" in DDI0211F_arm1136_r1p0_trm.pdf. This is
> also when TLS register got added, so we can use HAS_TLS also to test for
> MVFR0 and MVFR1.
> 
> Otherwise VFPFMRX and VFPFMXR access fails and we get:
> 
> Internal error: Oops - undefined instruction: 0 [#1]
> PC is at no_old_VFP_process+0x8/0x3c
> LR is at __und_svc+0x48/0x80
> ...
> 
> Signed-off-by: Tony Lindgren <tony@atomide.com>

The new version looks fine to me.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>

-- 
Catalin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-23 13:36       ` Jamie Lokier
  2010-06-23 14:19         ` Nicolas Pitre
@ 2010-06-29 14:18         ` Tony Lindgren
  2010-06-29 19:20           ` Nicolas Pitre
  1 sibling, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-29 14:18 UTC (permalink / raw)
  To: linux-arm-kernel

* Jamie Lokier <jamie@shareable.org> [100623 16:30]:
> Tony Lindgren wrote:
> > * Jamie Lokier <jamie@shareable.org> [100622 19:54]:
> > > Tony Lindgren wrote:
> > > >  __kuser_get_tls:				@ 0xffff0fe0
> > > > -
> > > > -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> > > > -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> > > > -#else
> > > > -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> > > > -#endif
> > > > +	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
> > > > +	cmp	r0, #0				@ hardware TLS if flag not set
> > > > +	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
> > > > +	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
> > > >  	usr_ret	lr
> > > > -
> > > > -	.rep	5
> > > > -	.word	0			@ pad up to __kuser_helper_version
> > > > -	.endr
> > > > +	.word	0				@ non-zero for software TLS
> > > > +	.word	0				@ software TLS value
> > > 
> > > It'd be nice not to waste instructions checking for HWCAP_TLS on archs
> > > which definitely don't have it.  I guess it doesn't matter elsewhere;
> > > I'd expect this to be a warm path for some programs making extensive
> > > use of TLS (I haven't measured though).
> > 
> > OK, but let's try to figure out a way that does not add more ifdef else
> > code as that makes it harder to build support for multiple ARM cores.
> >  
> > > As it's only a single instruction, and the code is in a writable page
> > > already (copied at init), how about just patching the instruction
> > > when ELF_HWCAP is set?
> > 
> > Yeah that can be done for __kuser_get_tls if it's always writable.
> > But __switch_to is trickier because of the CONFIG_MMU ifdefs there.
> 
> __kuser_get_tls must be writable in kernels where !HAS_TLS_REG is
> supported, because the TLS value is written to the same page.
> 
> I was thinking of changing *only* __kuser_get_tls, by the way.  Out of
> all the different places, that's the only one I'd expect to be a hot
> path in some TLS-using programs.

OK. Sorry for the delay again. Here's an updated version that sets
__kuser_get_tls instruction dynamically. Does this do what you were
thinking, or did I miss something?

Also, can we detect somehow the hardware that uses CONFIG_TLS_REG_EMUL?
Might be possible to remove that Kconfig option too later on..

Regards,

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: arm-tls-v3.patch
Type: text/x-diff
Size: 7349 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100629/99ddda56/attachment.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-29 14:18         ` Tony Lindgren
@ 2010-06-29 19:20           ` Nicolas Pitre
  2010-06-30 11:08             ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Nicolas Pitre @ 2010-06-29 19:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, 29 Jun 2010, Tony Lindgren wrote:

> OK. Sorry for the delay again. Here's an updated version that sets
> __kuser_get_tls instruction dynamically. Does this do what you were
> thinking, or did I miss something?

See my comments below.

> Also, can we detect somehow the hardware that uses CONFIG_TLS_REG_EMUL?
> Might be possible to remove that Kconfig option too later on..

Well... I don't think we _should_ try to detect it as it is not widely 
available if at all.

And there is actually a "bug" with __kuser_get_tls because if ever 
CONFIG_TLS_REG_EMUL is selected then a call to __kuser_get_tls is 
currently doing nothing while it should actually use the mrc 
instruction.

> diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
> index f7bd52b..c1062c3 100644
> --- a/arch/arm/include/asm/hwcap.h
> +++ b/arch/arm/include/asm/hwcap.h
> @@ -19,6 +19,7 @@
>  #define HWCAP_NEON	4096
>  #define HWCAP_VFPv3	8192
>  #define HWCAP_VFPv3D16	16384
> +#define HWCAP_TLS	32768
>  
>  #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
>  /*
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 7ee48e7..949df9b 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -739,11 +739,13 @@ ENTRY(__switch_to)
>  #ifdef CONFIG_MMU
>  	ldr	r6, [r2, #TI_CPU_DOMAIN]
>  #endif
> -#if defined(CONFIG_HAS_TLS_REG)
> -	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
> -#elif !defined(CONFIG_TLS_REG_EMUL)
> -	mov	r4, #0xffff0fff
> -	str	r3, [r4, #-15]			@ TLS val at 0xffff0ff0
> +#if !defined(CONFIG_TLS_REG_EMUL)
> +	ldr	r4, =elf_hwcap
> +	ldr	r4, [r4, #0]
> +	mov	r5, #0xffff0fff
> +	tst	r4, #HWCAP_TLS			@ hardware TLS available?
> +	mcrne	p15, 0, r3, c13, c0, 3		@ yes, set TLS register
> +	streq	r3, [r5, #-15]			@ set TLS value at 0xffff0ff0
>  #endif

Could this be a tristate instead?  There are actually 3 cases:

1) We know the TLS reg is _always_ available i.e. ARMv6K, ARMV7 and 
   above.  In that case the test on elf_hwcap is redundant and wasteful.

2) We know the TLS reg is _never_ available i.e. pre-ARMv6.  The test on 
   elf_hwcap is again redundant and wasteful.

3) We're unsure as the actual CPU is known only at run time.

Case #1 will become the common case in the future.  Case #2 is still 
widely relevant for deployed systems in the field, and some popular 
ARMv5TE based SOCs are still produced right now.  So instead of 
replacing #1 and #2 with #3, I think you should add #3 to the other 
cases instead.

>  #ifdef CONFIG_MMU
>  	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
> @@ -1009,16 +1011,13 @@ kuser_cmpxchg_fixup:
>   */
>  
>  __kuser_get_tls:				@ 0xffff0fe0
> -
> -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> -#else
> -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> -#endif
> +	nop				@ read TLS, set in kuser_get_tls_init
>  	usr_ret	lr
> -
> -	.rep	5
> -	.word	0			@ pad up to __kuser_helper_version
> +	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
> +	ldr	r0, [pc, #(16 - 8)]	@ 0xffff0fec software TLS code
> +	.word	0			@ 0xffff0ff0 software TLS value
> +	nop				@ pad up to __kuser_helper_version
> +	nop
>  	.endr

This looks obscur.  The idea of patching the appropriate instruction at 
runtime here is a good one.  However I'd simply keep the ldr version in 
place otherwise the pc displacement doesn't match anymore when 
disassembling.  And simply overwrite it with the mrc version when 
necessary.

BTW you left a stray .endr here.

>  /*
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index 122d999..a675260 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -269,6 +269,27 @@ static void __init cacheid_init(void)
>  extern struct proc_info_list *lookup_processor_type(unsigned int);
>  extern struct machine_desc *lookup_machine_type(unsigned int);
>  
> +#ifdef CONFIG_CPU_V6
> +static void __init feat_v6_fixup(void)
> +{
> +	int id = read_cpuid_id();
> +
> +	if (id & 0x000f0000 != 0x00070000)
> +		return;
> +
> +	/*
> +	 * HWCAP_TLS is available only on 1136 r1p0 and later,
> +	 * see also kuser_get_tls_init.
> +	 */
> +	if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
> +		elf_hwcap &= ~HWCAP_TLS;

You should probably test for the vendor ID as well (ARM in this case).

> +}
> +#else
> +static inline void feat_v6_fixup(void)
> +{
> +}
> +#endif

I think the #ifdef is unnecessary here.  This is __init code anyway, so 
this could as well be always compiled in.

> +
>  static void __init setup_processor(void)
>  {
>  	struct proc_info_list *list;
> @@ -311,6 +332,8 @@ static void __init setup_processor(void)
>  	elf_hwcap &= ~HWCAP_THUMB;
>  #endif
>  
> +	feat_v6_fixup();
> +
>  	cacheid_init();
>  	cpu_proc_init();
>  }
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index 1621e53..85dd001 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -518,16 +518,19 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
>  
>  	case NR(set_tls):
>  		thread->tp_value = regs->ARM_r0;
> -#if defined(CONFIG_HAS_TLS_REG)
> -		asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
> -#elif !defined(CONFIG_TLS_REG_EMUL)
> -		/*
> -		 * User space must never try to access this directly.
> -		 * Expect your app to break eventually if you do so.
> -		 * The user helper at 0xffff0fe0 must be used instead.
> -		 * (see entry-armv.S for details)
> -		 */
> -		*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
> +#if !defined(CONFIG_TLS_REG_EMUL)
> +		if (elf_hwcap & HWCAP_TLS) {
> +			asm ("mcr p15, 0, %0, c13, c0, 3"
> +				: : "r" (regs->ARM_r0));
> +		} else {
> +			/*
> +			 * User space must never try to access this directly.
> +			 * Expect your app to break eventually if you do so.
> +			 * The user helper at 0xffff0fe0 must be used instead.
> +			 * (see entry-armv.S for details)
> +			 */
> +			*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
> +		}
>  #endif

The same comment as for __kuser_get_tls would apply here.  However, 
instead of duplicating the code block, you could define a macro, such as 
has_tls_reg(), that would be either 0, 1, or ((elf_hwcap & HWCAP_TLS).

>  		return 0;
>  
> @@ -743,6 +746,21 @@ void __init trap_init(void)
>  	return;
>  }
>  
> +#if defined(CONFIG_TLS_REG_EMUL)
> +static void __init kuser_get_tls_init(unsigned long vectors)
> +{
> +	memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> +}
> +#else
> +static void __init kuser_get_tls_init(unsigned long vectors)
> +{
> +	if (elf_hwcap & HWCAP_TLS)
> +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> +	else
> +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfec, 4);
> +}
> +#endif

Please move the #ifdef within the function body.  Also it would be nice 
to add comments about what those magic offsets are.


Nicolas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-29 19:20           ` Nicolas Pitre
@ 2010-06-30 11:08             ` Tony Lindgren
  2010-06-30 13:17               ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-30 11:08 UTC (permalink / raw)
  To: linux-arm-kernel

* Nicolas Pitre <nico@fluxnic.net> [100629 22:14]:
> On Tue, 29 Jun 2010, Tony Lindgren wrote:
> 
> > Also, can we detect somehow the hardware that uses CONFIG_TLS_REG_EMUL?
> > Might be possible to remove that Kconfig option too later on..
> 
> Well... I don't think we _should_ try to detect it as it is not widely 
> available if at all.

OK
 
> And there is actually a "bug" with __kuser_get_tls because if ever 
> CONFIG_TLS_REG_EMUL is selected then a call to __kuser_get_tls is 
> currently doing nothing while it should actually use the mrc 
> instruction.

I've changed it to initialize to mrc if (tls_emu || has_tls).

> Could this be a tristate instead?  There are actually 3 cases:
> 
> 1) We know the TLS reg is _always_ available i.e. ARMv6K, ARMV7 and 
>    above.  In that case the test on elf_hwcap is redundant and wasteful.
> 
> 2) We know the TLS reg is _never_ available i.e. pre-ARMv6.  The test on 
>    elf_hwcap is again redundant and wasteful.
> 
> 3) We're unsure as the actual CPU is known only at run time.
> 
> Case #1 will become the common case in the future.  Case #2 is still 
> widely relevant for deployed systems in the field, and some popular 
> ARMv5TE based SOCs are still produced right now.  So instead of 
> replacing #1 and #2 with #3, I think you should add #3 to the other 
> cases instead.

OK. I've implemented this logic in new file tls.h.

> >  #ifdef CONFIG_MMU
> >  	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
> > @@ -1009,16 +1011,13 @@ kuser_cmpxchg_fixup:
> >   */
> >  
> >  __kuser_get_tls:				@ 0xffff0fe0
> > -
> > -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> > -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> > -#else
> > -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> > -#endif
> > +	nop				@ read TLS, set in kuser_get_tls_init
> >  	usr_ret	lr
> > -
> > -	.rep	5
> > -	.word	0			@ pad up to __kuser_helper_version
> > +	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
> > +	ldr	r0, [pc, #(16 - 8)]	@ 0xffff0fec software TLS code
> > +	.word	0			@ 0xffff0ff0 software TLS value
> > +	nop				@ pad up to __kuser_helper_version
> > +	nop
> >  	.endr
> 
> This looks obscur.  The idea of patching the appropriate instruction at 
> runtime here is a good one.  However I'd simply keep the ldr version in 
> place otherwise the pc displacement doesn't match anymore when 
> disassembling.  And simply overwrite it with the mrc version when 
> necessary.

OK, changed to use the ldr by default.
 
> BTW you left a stray .endr here.

Oops. I've put back the .rep .endrep instead of the nops now.
 
> > +	if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
> > +		elf_hwcap &= ~HWCAP_TLS;
> 
> You should probably test for the vendor ID as well (ARM in this case).

OK, added.
 
> > +}
> > +#else
> > +static inline void feat_v6_fixup(void)
> > +{
> > +}
> > +#endif
> 
> I think the #ifdef is unnecessary here.  This is __init code anyway, so 
> this could as well be always compiled in.

Removed.
 
> > diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> > index 1621e53..85dd001 100644
> > --- a/arch/arm/kernel/traps.c
> > +++ b/arch/arm/kernel/traps.c
> > @@ -518,16 +518,19 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
> >  
> >  	case NR(set_tls):
> >  		thread->tp_value = regs->ARM_r0;
> > -#if defined(CONFIG_HAS_TLS_REG)
> > -		asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
> > -#elif !defined(CONFIG_TLS_REG_EMUL)
> > -		/*
> > -		 * User space must never try to access this directly.
> > -		 * Expect your app to break eventually if you do so.
> > -		 * The user helper at 0xffff0fe0 must be used instead.
> > -		 * (see entry-armv.S for details)
> > -		 */
> > -		*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
> > +#if !defined(CONFIG_TLS_REG_EMUL)
> > +		if (elf_hwcap & HWCAP_TLS) {
> > +			asm ("mcr p15, 0, %0, c13, c0, 3"
> > +				: : "r" (regs->ARM_r0));
> > +		} else {
> > +			/*
> > +			 * User space must never try to access this directly.
> > +			 * Expect your app to break eventually if you do so.
> > +			 * The user helper at 0xffff0fe0 must be used instead.
> > +			 * (see entry-armv.S for details)
> > +			 */
> > +			*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
> > +		}
> >  #endif
> 
> The same comment as for __kuser_get_tls would apply here.  However, 
> instead of duplicating the code block, you could define a macro, such as 
> has_tls_reg(), that would be either 0, 1, or ((elf_hwcap & HWCAP_TLS).

Now using tls_emu and has_tls defines.
 
> >  		return 0;
> >  
> > @@ -743,6 +746,21 @@ void __init trap_init(void)
> >  	return;
> >  }
> >  
> > +#if defined(CONFIG_TLS_REG_EMUL)
> > +static void __init kuser_get_tls_init(unsigned long vectors)
> > +{
> > +	memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> > +}
> > +#else
> > +static void __init kuser_get_tls_init(unsigned long vectors)
> > +{
> > +	if (elf_hwcap & HWCAP_TLS)
> > +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> > +	else
> > +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfec, 4);
> > +}
> > +#endif
> 
> Please move the #ifdef within the function body.  Also it would be nice 
> to add comments about what those magic offsets are.

This is now cleaner too with if (tls_emu || has_tls).

Updated patch below.

Regards,

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: arm-tls-v4.patch
Type: text/x-diff
Size: 8804 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100630/5ef872de/attachment.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-30 11:08             ` Tony Lindgren
@ 2010-06-30 13:17               ` Tony Lindgren
  2010-06-30 14:42                 ` Nicolas Pitre
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-06-30 13:17 UTC (permalink / raw)
  To: linux-arm-kernel

* Tony Lindgren <tony@atomide.com> [100630 14:02]:
> * Nicolas Pitre <nico@fluxnic.net> [100629 22:14]:
> 
> Updated patch below.

And a bug crept in..

> +	.macro set_tls_v6, tp, tmp1, tmp2
> +	ldr	\tmp1, =elf_hwcap
> +	ldr	\tmp1, [\tmp1, #0]
> +	mov	\tmp2, #0xffff0fff
> +	tst	\tmp2, #HWCAP_TLS		@ hardware TLS available?
> +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> +	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> +	.endm

The tst	\tmp2, #HWCAP_TLS should of course use \tmp1.

Also fixed a warning about adding parentheses around comparison
if (id & 0x410f0000) != 0x41070000).

Again, updated patch below.

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-30 13:17               ` Tony Lindgren
@ 2010-06-30 14:42                 ` Nicolas Pitre
  2010-07-01  9:25                   ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Nicolas Pitre @ 2010-06-30 14:42 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 30 Jun 2010, Tony Lindgren wrote:

> * Tony Lindgren <tony@atomide.com> [100630 14:02]:
> > * Nicolas Pitre <nico@fluxnic.net> [100629 22:14]:
> > 
> > Updated patch below.
> 
> And a bug crept in..
> 
> > +	.macro set_tls_v6, tp, tmp1, tmp2
> > +	ldr	\tmp1, =elf_hwcap
> > +	ldr	\tmp1, [\tmp1, #0]
> > +	mov	\tmp2, #0xffff0fff
> > +	tst	\tmp2, #HWCAP_TLS		@ hardware TLS available?
> > +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> > +	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> > +	.endm
> 
> The tst	\tmp2, #HWCAP_TLS should of course use \tmp1.
> 
> Also fixed a warning about adding parentheses around comparison
> if (id & 0x410f0000) != 0x41070000).

Here you probably want (id & 0xff0f0000) and not (id & 0x410f0000).

> Again, updated patch below.

I like it.  However, in proc-v6.S, you don't need to add a reference to 
feat_v6_fixup() to the __pj4_v6_proc_info block.  Simply adding 
HWCAP_TLS in that case should be fine as PJ4 always has the TLS reg.

With the above fixes, you can add

Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>


Nicolas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-06-30 14:42                 ` Nicolas Pitre
@ 2010-07-01  9:25                   ` Tony Lindgren
  2010-07-01 17:40                     ` Jamie Lokier
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-07-01  9:25 UTC (permalink / raw)
  To: linux-arm-kernel

* Nicolas Pitre <nico@fluxnic.net> [100630 17:36]:
> On Wed, 30 Jun 2010, Tony Lindgren wrote:
> 
> > * Tony Lindgren <tony@atomide.com> [100630 14:02]:
> > > * Nicolas Pitre <nico@fluxnic.net> [100629 22:14]:
> > > 
> > > Updated patch below.
> > 
> > And a bug crept in..
> > 
> > > +	.macro set_tls_v6, tp, tmp1, tmp2
> > > +	ldr	\tmp1, =elf_hwcap
> > > +	ldr	\tmp1, [\tmp1, #0]
> > > +	mov	\tmp2, #0xffff0fff
> > > +	tst	\tmp2, #HWCAP_TLS		@ hardware TLS available?
> > > +	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
> > > +	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
> > > +	.endm
> > 
> > The tst	\tmp2, #HWCAP_TLS should of course use \tmp1.
> > 
> > Also fixed a warning about adding parentheses around comparison
> > if (id & 0x410f0000) != 0x41070000).
> 
> Here you probably want (id & 0xff0f0000) and not (id & 0x410f0000).

Thanks, fixed.
 
> > Again, updated patch below.
> 
> I like it.  However, in proc-v6.S, you don't need to add a reference to 
> feat_v6_fixup() to the __pj4_v6_proc_info block.  Simply adding 
> HWCAP_TLS in that case should be fine as PJ4 always has the TLS reg.

Fixed that too.
 
> With the above fixes, you can add
> 
> Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>

Thanks, updated patch below.

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: arm-tls-v6.patch
Type: text/x-diff
Size: 8726 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100701/fa791efd/attachment-0001.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/2] arm: Make VFPv3 usable on ARMv6
  2010-06-25 13:50         ` Catalin Marinas
@ 2010-07-01 12:42           ` Tony Lindgren
  0 siblings, 0 replies; 33+ messages in thread
From: Tony Lindgren @ 2010-07-01 12:42 UTC (permalink / raw)
  To: linux-arm-kernel

* Catalin Marinas <catalin.marinas@arm.com> [100625 16:44]:
> On Wed, 2010-06-23 at 08:57 +0100, Tony Lindgren wrote:
> > MVFR0 and MVFR1 are only available starting with ARM1136 r1p0 release
> > according to "B.5 VFP changes" in DDI0211F_arm1136_r1p0_trm.pdf. This is
> > also when TLS register got added, so we can use HAS_TLS also to test for
> > MVFR0 and MVFR1.
> > 
> > Otherwise VFPFMRX and VFPFMXR access fails and we get:
> > 
> > Internal error: Oops - undefined instruction: 0 [#1]
> > PC is at no_old_VFP_process+0x8/0x3c
> > LR is at __und_svc+0x48/0x80
> > ...
> > 
> > Signed-off-by: Tony Lindgren <tony@atomide.com>
> 
> The new version looks fine to me.
> 
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>

Thanks. Added your ack and posted to Russell's patch system:

http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=6203/1

Regards,

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-07-01  9:25                   ` Tony Lindgren
@ 2010-07-01 17:40                     ` Jamie Lokier
  2010-07-02  2:37                       ` Nicolas Pitre
  0 siblings, 1 reply; 33+ messages in thread
From: Jamie Lokier @ 2010-07-01 17:40 UTC (permalink / raw)
  To: linux-arm-kernel

Tony Lindgren wrote:
> +static void __init kuser_get_tls_init(unsigned long vectors)
> +{
> +	/*
> +	 * vectors + 0xfe0 = __kuser_get_tls
> +	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
> +	 */
> +	if (tls_emu || has_tls)
> +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> +}

Just a little opinion: Perhaps has_tls_reg would be a clearer name.
All variants "have TLS" after all.

Also - and this isn't directly related to your change so feel free to
ignore it - wouldn't it make more sense for the tls_emu case to use
the memory version (and update the memory location), so that even on
tls_emu systems, user programs which call the kuser code will run faster?
With that change, there would be no real penalty to enabling tls_emu
for any system that finds it useful.

-- Jamie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-07-01 17:40                     ` Jamie Lokier
@ 2010-07-02  2:37                       ` Nicolas Pitre
  2010-07-02 10:37                         ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Nicolas Pitre @ 2010-07-02  2:37 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 1 Jul 2010, Jamie Lokier wrote:

> Tony Lindgren wrote:
> > +static void __init kuser_get_tls_init(unsigned long vectors)
> > +{
> > +	/*
> > +	 * vectors + 0xfe0 = __kuser_get_tls
> > +	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
> > +	 */
> > +	if (tls_emu || has_tls)
> > +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> > +}
> 
> Just a little opinion: Perhaps has_tls_reg would be a clearer name.
> All variants "have TLS" after all.

Good point.

> Also - and this isn't directly related to your change so feel free to
> ignore it - wouldn't it make more sense for the tls_emu case to use
> the memory version (and update the memory location), so that even on
> tls_emu systems, user programs which call the kuser code will run faster?
> With that change, there would be no real penalty to enabling tls_emu
> for any system that finds it useful.

No, that's not possible.

The tls_emu case was created to allow SMP system without the actual 
TLS register to still work.  With SMP you cannot rely on a global memory 
location to hold the TLS value.  And because such systems are so 
"unusual" it was simpler to just do as if the TLS register was there and 
then emulate it within the instruction exception handler.


Nicolas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-07-02  2:37                       ` Nicolas Pitre
@ 2010-07-02 10:37                         ` Tony Lindgren
  2010-07-05 13:55                           ` Tony Lindgren
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-07-02 10:37 UTC (permalink / raw)
  To: linux-arm-kernel

* Nicolas Pitre <nico@fluxnic.net> [100702 05:31]:
> On Thu, 1 Jul 2010, Jamie Lokier wrote:
> 
> > Tony Lindgren wrote:
> > > +static void __init kuser_get_tls_init(unsigned long vectors)
> > > +{
> > > +	/*
> > > +	 * vectors + 0xfe0 = __kuser_get_tls
> > > +	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
> > > +	 */
> > > +	if (tls_emu || has_tls)
> > > +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> > > +}
> > 
> > Just a little opinion: Perhaps has_tls_reg would be a clearer name.
> > All variants "have TLS" after all.
> 
> Good point.

I like that too. Updated patch below.

Tony
-------------- next part --------------
A non-text attachment was scrubbed...
Name: arm-tls-v7.patch
Type: text/x-diff
Size: 8749 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20100702/39043301/attachment.bin>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-07-02 10:37                         ` Tony Lindgren
@ 2010-07-05 13:55                           ` Tony Lindgren
  2011-04-08  3:39                             ` Li Li
  0 siblings, 1 reply; 33+ messages in thread
From: Tony Lindgren @ 2010-07-05 13:55 UTC (permalink / raw)
  To: linux-arm-kernel

* Tony Lindgren <tony@atomide.com> [100702 13:32]:
> * Nicolas Pitre <nico@fluxnic.net> [100702 05:31]:
> > On Thu, 1 Jul 2010, Jamie Lokier wrote:
> > 
> > > Tony Lindgren wrote:
> > > > +static void __init kuser_get_tls_init(unsigned long vectors)
> > > > +{
> > > > +	/*
> > > > +	 * vectors + 0xfe0 = __kuser_get_tls
> > > > +	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
> > > > +	 */
> > > > +	if (tls_emu || has_tls)
> > > > +		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
> > > > +}
> > > 
> > > Just a little opinion: Perhaps has_tls_reg would be a clearer name.
> > > All variants "have TLS" after all.
> > 
> > Good point.
> 
> I like that too. Updated patch below.

I've uploaded this patch into Russell's patch system:

http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=6207/1

Jamie, assuming no more comments, do you care to Ack/Reviewed/Tested-by
the patch in the link above?

Regards,

Tony

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2010-07-05 13:55                           ` Tony Lindgren
@ 2011-04-08  3:39                             ` Li Li
  2011-04-08 13:19                               ` Nicolas Pitre
  0 siblings, 1 reply; 33+ messages in thread
From: Li Li @ 2011-04-08  3:39 UTC (permalink / raw)
  To: linux-arm-kernel

Dears,

I cannot understand how TLS EMU ensure it's SMP safe, because get_tls
helper (at 0xffff0fe0) just read the value from 0xffff0ff0. But all
SMP cores should have the exact same mapping to the vectors page (at
0xffff0000). So various threads running on different SMP cores at the
same time would get the same emulated TLS value (instead of thread
specific value set via set_tls). Could anybody explain this a little
bit?

Thanks,
Lea

On Mon, Jul 5, 2010 at 9:55 PM, Tony Lindgren <tony@atomide.com> wrote:
> * Tony Lindgren <tony@atomide.com> [100702 13:32]:
>> * Nicolas Pitre <nico@fluxnic.net> [100702 05:31]:
>> > On Thu, 1 Jul 2010, Jamie Lokier wrote:
>> >
>> > > Tony Lindgren wrote:
>> > > > +static void __init kuser_get_tls_init(unsigned long vectors)
>> > > > +{
>> > > > + ? ? ? /*
>> > > > + ? ? ? ?* vectors + 0xfe0 = __kuser_get_tls
>> > > > + ? ? ? ?* vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
>> > > > + ? ? ? ?*/
>> > > > + ? ? ? if (tls_emu || has_tls)
>> > > > + ? ? ? ? ? ? ? memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
>> > > > +}
>> > >
>> > > Just a little opinion: Perhaps has_tls_reg would be a clearer name.
>> > > All variants "have TLS" after all.
>> >
>> > Good point.
>>
>> I like that too. Updated patch below.
>
> I've uploaded this patch into Russell's patch system:
>
> http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=6207/1
>
> Jamie, assuming no more comments, do you care to Ack/Reviewed/Tested-by
> the patch in the link above?
>
> Regards,
>
> Tony
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2011-04-08  3:39                             ` Li Li
@ 2011-04-08 13:19                               ` Nicolas Pitre
  2011-04-08 13:35                                 ` Li Li
  0 siblings, 1 reply; 33+ messages in thread
From: Nicolas Pitre @ 2011-04-08 13:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, 8 Apr 2011, Li Li wrote:

> Dears,
> 
> I cannot understand how TLS EMU ensure it's SMP safe, because get_tls
> helper (at 0xffff0fe0) just read the value from 0xffff0ff0. But all
> SMP cores should have the exact same mapping to the vectors page (at
> 0xffff0000). So various threads running on different SMP cores at the
> same time would get the same emulated TLS value (instead of thread
> specific value set via set_tls). Could anybody explain this a little
> bit?

On SMP the hardware TLS register is always available, therefore the TLS 
value is not stored at 0xffff0ff0.  The code actually installed at 
0xffff0fe0 is modified at boot time by kuser_get_tls_init to select 
either the ldr or the mrc instruction to retrieve the TLS value.


Nicolas

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2011-04-08 13:19                               ` Nicolas Pitre
@ 2011-04-08 13:35                                 ` Li Li
  2011-04-08 14:35                                   ` Jamie Lokier
  0 siblings, 1 reply; 33+ messages in thread
From: Li Li @ 2011-04-08 13:35 UTC (permalink / raw)
  To: linux-arm-kernel

Nicolas,

Thanks for the explanation. :)

I noticed what you mentioned. Actually, Kconfig says TLS EMU might be
used in some "old" (e.g. pre-armv6) SMP platforms without TLS
register. In such a case, could we still ensure it's SMP safe by a
single ldr?

Thanks,
Lea

On Fri, Apr 8, 2011 at 9:19 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> On Fri, 8 Apr 2011, Li Li wrote:
>
>> Dears,
>>
>> I cannot understand how TLS EMU ensure it's SMP safe, because get_tls
>> helper (at 0xffff0fe0) just read the value from 0xffff0ff0. But all
>> SMP cores should have the exact same mapping to the vectors page (at
>> 0xffff0000). So various threads running on different SMP cores at the
>> same time would get the same emulated TLS value (instead of thread
>> specific value set via set_tls). Could anybody explain this a little
>> bit?
>
> On SMP the hardware TLS register is always available, therefore the TLS
> value is not stored at 0xffff0ff0. ?The code actually installed at
> 0xffff0fe0 is modified at boot time by kuser_get_tls_init to select
> either the ldr or the mrc instruction to retrieve the TLS value.
>
>
> Nicolas
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2011-04-08 13:35                                 ` Li Li
@ 2011-04-08 14:35                                   ` Jamie Lokier
  2011-04-08 14:40                                     ` Li Li
  0 siblings, 1 reply; 33+ messages in thread
From: Jamie Lokier @ 2011-04-08 14:35 UTC (permalink / raw)
  To: linux-arm-kernel

Li Li wrote:
> Nicolas,
> 
> Thanks for the explanation. :)
> 
> I noticed what you mentioned. Actually, Kconfig says TLS EMU might be
> used in some "old" (e.g. pre-armv6) SMP platforms without TLS
> register. In such a case, could we still ensure it's SMP safe by a
> single ldr?

In that case, the hardware TLS 'mcr' is used, which traps and is
emulated by the undefined instruction handler.  It's not fast but
presumably those platforms don't really matter.

See CONFIG_TLS_REG_EMUL in arch/arm/kernel/traps.c, and 'tls_emu'.

'tls_emu' is a constant, so if a kernel built for TLS emulation is run
on something which has the TLS register, it will not work properly.

-- JAmie

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
  2011-04-08 14:35                                   ` Jamie Lokier
@ 2011-04-08 14:40                                     ` Li Li
  0 siblings, 0 replies; 33+ messages in thread
From: Li Li @ 2011-04-08 14:40 UTC (permalink / raw)
  To: linux-arm-kernel

Ah, I see. That's why I saw abnormal behavior when I enabled TLS EMU
on an ARMv7 SMP SOC. Many thanks! :)

On Fri, Apr 8, 2011 at 10:35 PM, Jamie Lokier <jamie@shareable.org> wrote:
> Li Li wrote:
>> Nicolas,
>>
>> Thanks for the explanation. :)
>>
>> I noticed what you mentioned. Actually, Kconfig says TLS EMU might be
>> used in some "old" (e.g. pre-armv6) SMP platforms without TLS
>> register. In such a case, could we still ensure it's SMP safe by a
>> single ldr?
>
> In that case, the hardware TLS 'mcr' is used, which traps and is
> emulated by the undefined instruction handler. ?It's not fast but
> presumably those platforms don't really matter.
>
> See CONFIG_TLS_REG_EMUL in arch/arm/kernel/traps.c, and 'tls_emu'.
>
> 'tls_emu' is a constant, so if a kernel built for TLS emulation is run
> on something which has the TLS register, it will not work properly.
>
> -- JAmie
>

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2011-04-08 14:40 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-21 13:51 [PATCH 0/2] Make ARMv6 behave with TLS, VFPv3, and NEON Tony Lindgren
2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
2010-06-22  9:28   ` Tony Lindgren
2010-06-22 17:00   ` Jamie Lokier
2010-06-23  7:39     ` Tony Lindgren
2010-06-23  8:12       ` Russell King - ARM Linux
2010-06-23  9:28         ` Tony Lindgren
2010-06-23  9:32           ` Russell King - ARM Linux
2010-06-23 13:28           ` Jamie Lokier
2010-06-23 13:36       ` Jamie Lokier
2010-06-23 14:19         ` Nicolas Pitre
2010-06-24  0:28           ` Jamie Lokier
2010-06-29 14:18         ` Tony Lindgren
2010-06-29 19:20           ` Nicolas Pitre
2010-06-30 11:08             ` Tony Lindgren
2010-06-30 13:17               ` Tony Lindgren
2010-06-30 14:42                 ` Nicolas Pitre
2010-07-01  9:25                   ` Tony Lindgren
2010-07-01 17:40                     ` Jamie Lokier
2010-07-02  2:37                       ` Nicolas Pitre
2010-07-02 10:37                         ` Tony Lindgren
2010-07-05 13:55                           ` Tony Lindgren
2011-04-08  3:39                             ` Li Li
2011-04-08 13:19                               ` Nicolas Pitre
2011-04-08 13:35                                 ` Li Li
2011-04-08 14:35                                   ` Jamie Lokier
2011-04-08 14:40                                     ` Li Li
2010-06-21 13:51 ` [PATCH 2/2] arm: Make VFPv3 usable on ARMv6 Tony Lindgren
2010-06-22 12:59   ` Catalin Marinas
2010-06-22 13:20     ` Tony Lindgren
2010-06-23  7:57       ` Tony Lindgren
2010-06-25 13:50         ` Catalin Marinas
2010-07-01 12:42           ` Tony Lindgren

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).