linux-omap.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tony Lindgren <tony@atomide.com>
To: Jamie Lokier <jamie@shareable.org>
Cc: linux-arm-kernel@lists.infradead.org, linux-omap@vger.kernel.org
Subject: Re: [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6
Date: Tue, 29 Jun 2010 17:18:36 +0300	[thread overview]
Message-ID: <20100629141836.GM2822@atomide.com> (raw)
In-Reply-To: <20100623133636.GC7058@shareable.org>

[-- Attachment #1: Type: text/plain, Size: 2322 bytes --]

* Jamie Lokier <jamie@shareable.org> [100623 16:30]:
> Tony Lindgren wrote:
> > * Jamie Lokier <jamie@shareable.org> [100622 19:54]:
> > > Tony Lindgren wrote:
> > > >  __kuser_get_tls:				@ 0xffff0fe0
> > > > -
> > > > -#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
> > > > -	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
> > > > -#else
> > > > -	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
> > > > -#endif
> > > > +	ldr     r0, [pc, #(20 - 8)]		@ software TLS set in 0xffff0ff4?
> > > > +	cmp	r0, #0				@ hardware TLS if flag not set
> > > > +	mrceq	p15, 0, r0, c13, c0, 3		@ read hardware TLS register
> > > > +	ldrne	r0, [pc, #(12 - 8)]		@ software TLS val at 0xffff0ff8
> > > >  	usr_ret	lr
> > > > -
> > > > -	.rep	5
> > > > -	.word	0			@ pad up to __kuser_helper_version
> > > > -	.endr
> > > > +	.word	0				@ non-zero for software TLS
> > > > +	.word	0				@ software TLS value
> > > 
> > > It'd be nice not to waste instructions checking for HWCAP_TLS on archs
> > > which definitely don't have it.  I guess it doesn't matter elsewhere;
> > > I'd expect this to be a warm path for some programs making extensive
> > > use of TLS (I haven't measured though).
> > 
> > OK, but let's try to figure out a way that does not add more ifdef else
> > code as that makes it harder to build support for multiple ARM cores.
> >  
> > > As it's only a single instruction, and the code is in a writable page
> > > already (copied at init), how about just patching the instruction
> > > when ELF_HWCAP is set?
> > 
> > Yeah that can be done for __kuser_get_tls if it's always writable.
> > But __switch_to is trickier because of the CONFIG_MMU ifdefs there.
> 
> __kuser_get_tls must be writable in kernels where !HAS_TLS_REG is
> supported, because the TLS value is written to the same page.
> 
> I was thinking of changing *only* __kuser_get_tls, by the way.  Out of
> all the different places, that's the only one I'd expect to be a hot
> path in some TLS-using programs.

OK. Sorry for the delay again. Here's an updated version that sets
__kuser_get_tls instruction dynamically. Does this do what you were
thinking, or did I miss something?

Also, can we detect somehow the hardware that uses CONFIG_TLS_REG_EMUL?
Might be possible to remove that Kconfig option too later on..

Regards,

Tony

[-- Attachment #2: arm-tls-v3.patch --]
[-- Type: text/x-diff, Size: 7349 bytes --]

From: Tony Lindgren <tony@atomide.com>
Date: Tue, 29 Jun 2010 13:34:53 +0300
Subject: [PATCH] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6

The TLS register is only available on ARM1136 r1p0 and later.
Set HWCAP_TLS flags if hardware TLS is available.

Note that we set the TLS instruction in __kuser_get_tls
dynamically as suggested by Jamie Lokier <jamie@shareable.org>.

Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index f7bd52b..c1062c3 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -19,6 +19,7 @@
 #define HWCAP_NEON	4096
 #define HWCAP_VFPv3	8192
 #define HWCAP_VFPv3D16	16384
+#define HWCAP_TLS	32768
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 /*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7ee48e7..949df9b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -739,11 +739,13 @@ ENTRY(__switch_to)
 #ifdef CONFIG_MMU
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-#if defined(CONFIG_HAS_TLS_REG)
-	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
-#elif !defined(CONFIG_TLS_REG_EMUL)
-	mov	r4, #0xffff0fff
-	str	r3, [r4, #-15]			@ TLS val at 0xffff0ff0
+#if !defined(CONFIG_TLS_REG_EMUL)
+	ldr	r4, =elf_hwcap
+	ldr	r4, [r4, #0]
+	mov	r5, #0xffff0fff
+	tst	r4, #HWCAP_TLS			@ hardware TLS available?
+	mcrne	p15, 0, r3, c13, c0, 3		@ yes, set TLS register
+	streq	r3, [r5, #-15]			@ set TLS value at 0xffff0ff0
 #endif
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
@@ -1009,16 +1011,13 @@ kuser_cmpxchg_fixup:
  */
 
 __kuser_get_tls:				@ 0xffff0fe0
-
-#if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
-	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
-#else
-	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
-#endif
+	nop				@ read TLS, set in kuser_get_tls_init
 	usr_ret	lr
-
-	.rep	5
-	.word	0			@ pad up to __kuser_helper_version
+	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
+	ldr	r0, [pc, #(16 - 8)]	@ 0xffff0fec software TLS code
+	.word	0			@ 0xffff0ff0 software TLS value
+	nop				@ pad up to __kuser_helper_version
+	nop
 	.endr
 
 /*
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 122d999..a675260 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -269,6 +269,27 @@ static void __init cacheid_init(void)
 extern struct proc_info_list *lookup_processor_type(unsigned int);
 extern struct machine_desc *lookup_machine_type(unsigned int);
 
+#ifdef CONFIG_CPU_V6
+static void __init feat_v6_fixup(void)
+{
+	int id = read_cpuid_id();
+
+	if (id & 0x000f0000 != 0x00070000)
+		return;
+
+	/*
+	 * HWCAP_TLS is available only on 1136 r1p0 and later,
+	 * see also kuser_get_tls_init.
+	 */
+	if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
+		elf_hwcap &= ~HWCAP_TLS;
+}
+#else
+static inline void feat_v6_fixup(void)
+{
+}
+#endif
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -311,6 +332,8 @@ static void __init setup_processor(void)
 	elf_hwcap &= ~HWCAP_THUMB;
 #endif
 
+	feat_v6_fixup();
+
 	cacheid_init();
 	cpu_proc_init();
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1621e53..85dd001 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -518,16 +518,19 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 
 	case NR(set_tls):
 		thread->tp_value = regs->ARM_r0;
-#if defined(CONFIG_HAS_TLS_REG)
-		asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
-#elif !defined(CONFIG_TLS_REG_EMUL)
-		/*
-		 * User space must never try to access this directly.
-		 * Expect your app to break eventually if you do so.
-		 * The user helper at 0xffff0fe0 must be used instead.
-		 * (see entry-armv.S for details)
-		 */
-		*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+#if !defined(CONFIG_TLS_REG_EMUL)
+		if (elf_hwcap & HWCAP_TLS) {
+			asm ("mcr p15, 0, %0, c13, c0, 3"
+				: : "r" (regs->ARM_r0));
+		} else {
+			/*
+			 * User space must never try to access this directly.
+			 * Expect your app to break eventually if you do so.
+			 * The user helper at 0xffff0fe0 must be used instead.
+			 * (see entry-armv.S for details)
+			 */
+			*((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+		}
 #endif
 		return 0;
 
@@ -743,6 +746,21 @@ void __init trap_init(void)
 	return;
 }
 
+#if defined(CONFIG_TLS_REG_EMUL)
+static void __init kuser_get_tls_init(unsigned long vectors)
+{
+	memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+}
+#else
+static void __init kuser_get_tls_init(unsigned long vectors)
+{
+	if (elf_hwcap & HWCAP_TLS)
+		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+	else
+		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfec, 4);
+}
+#endif
+
 void __init early_trap_init(void)
 {
 	unsigned long vectors = CONFIG_VECTORS_BASE;
@@ -761,6 +779,11 @@ void __init early_trap_init(void)
 	memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
 
 	/*
+	 * Do processor specific fixups for the kuser helpers
+	 */
+	kuser_get_tls_init(vectors);
+
+	/*
 	 * Copy signal return handlers into the vector page, and
 	 * set sigreturn to be a pointer to these.
 	 */
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 346ae14..71d5d5e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -717,17 +717,6 @@ config TLS_REG_EMUL
 	  a few prototypes like that in existence) and therefore access to
 	  that required register must be emulated.
 
-config HAS_TLS_REG
-	bool
-	depends on !TLS_REG_EMUL
-	default y if SMP || CPU_32v7
-	help
-	  This selects support for the CP15 thread register.
-	  It is defined to be available on some ARMv6 processors (including
-	  all SMP capable ARMv6's) or later processors.  User space may
-	  assume directly accessing that register and always obtain the
-	  expected value only on ARMv7 and above.
-
 config NEEDS_SYSCALL_FOR_CMPXCHG
 	bool
 	help
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 7a5337e..e10626a 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -239,7 +239,8 @@ __v6_proc_info:
 	b	__v6_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	/* See also feat_v6_fixup() for HWCAP_TLS */
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
 	.long	cpu_v6_name
 	.long	v6_processor_functions
 	.long	v6wbi_tlb_fns
@@ -262,7 +263,8 @@ __pj4_v6_proc_info:
 	b	__v6_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	/* See also feat_v6_fixup() for HWCAP_TLS */
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
 	.long	cpu_pj4_name
 	.long	v6_processor_functions
 	.long	v6wbi_tlb_fns
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 7aaf88a..8071bcd 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -344,7 +344,7 @@ __v7_proc_info:
 	b	__v7_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
 	.long	cpu_v7_name
 	.long	v7_processor_functions
 	.long	v7wbi_tlb_fns

  parent reply	other threads:[~2010-06-29 14:18 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-21 13:51 [PATCH 0/2] Make ARMv6 behave with TLS, VFPv3, and NEON Tony Lindgren
2010-06-21 13:51 ` [PATCH 1/2] arm: Replace CONFIG_HAS_TLS_REG with HWCAP_TLS and check for it on V6 Tony Lindgren
2010-06-22  9:28   ` Tony Lindgren
2010-06-22 17:00   ` Jamie Lokier
2010-06-23  7:39     ` Tony Lindgren
2010-06-23  8:12       ` Russell King - ARM Linux
2010-06-23  9:28         ` Tony Lindgren
2010-06-23  9:32           ` Russell King - ARM Linux
2010-06-23 13:28           ` Jamie Lokier
2010-06-23 13:36       ` Jamie Lokier
2010-06-23 14:19         ` Nicolas Pitre
2010-06-24  0:28           ` Jamie Lokier
2010-06-29 14:18         ` Tony Lindgren [this message]
2010-06-29 19:20           ` Nicolas Pitre
2010-06-30 11:08             ` Tony Lindgren
2010-06-30 13:17               ` Tony Lindgren
2010-06-30 14:42                 ` Nicolas Pitre
2010-07-01  9:25                   ` Tony Lindgren
2010-07-01 17:40                     ` Jamie Lokier
2010-07-02  2:37                       ` Nicolas Pitre
2010-07-02 10:37                         ` Tony Lindgren
2010-07-05 13:55                           ` Tony Lindgren
2011-04-08  3:39                             ` Li Li
2011-04-08 13:19                               ` Nicolas Pitre
2011-04-08 13:35                                 ` Li Li
2011-04-08 14:35                                   ` Jamie Lokier
2011-04-08 14:40                                     ` Li Li
2010-06-21 13:51 ` [PATCH 2/2] arm: Make VFPv3 usable on ARMv6 Tony Lindgren
2010-06-22 12:59   ` Catalin Marinas
2010-06-22 13:20     ` Tony Lindgren
2010-06-23  7:57       ` Tony Lindgren
2010-06-25 13:50         ` Catalin Marinas
2010-07-01 12:42           ` Tony Lindgren

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100629141836.GM2822@atomide.com \
    --to=tony@atomide.com \
    --cc=jamie@shareable.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-omap@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).