Linux MIPS Architecture development
 help / color / mirror / Atom feed
From: Sergei Shtylyov <sshtylyov@mvista.com>
To: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org, Paul Gortmaker <paul.gortmaker@windriver.com>
Subject: Re: MIPS: Get rid of branches to .subsections.
Date: Wed, 18 Aug 2010 17:25:20 +0400	[thread overview]
Message-ID: <4C6BDF40.9050804@mvista.com> (raw)
In-Reply-To: <20100818124310.GA23744@linux-mips.org>

Hello.

Ralf Baechle wrote:

> It was a nice optimization - on paper at least.  In practice it results in
> branches that may exceed the maximum legal range for a branch.  We can
> fight that problem with -ffunction-sections but -ffunction-sections again
> is incompatible with -pg used by the function tracer.

> By rewriting the loop around all simple LL/SC blocks to C we reduce reduce
> the amount of inline assembler and at the same time allow GCC to often
> fill the branch delay slots with something sensible or whever else clever
                                                          ^^^^^^
    Whichever?

> optimization it may have up in its sleeve.

> With this optimization gone we also no longer need -ffunction-sections,
> so drop it.

> This optimization was originall introduced in 2.6.21, commit
                         ^^^^^^^^^
     Originally.

> 5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
> f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).

> Original fix for the issues which caused me to pull this optimization by
> Paul Gortmaker <paul.gortmaker@windriver.com>.

> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

> Index: linux-queue/arch/mips/include/asm/atomic.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/atomic.h
> +++ linux-queue/arch/mips/include/asm/atomic.h

[...]

> @@ -443,18 +436,16 @@ static __inline__ void atomic64_add(long
>  	} else if (kernel_uses_llsc) {
>  		long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	mips3					\n"
> -		"1:	lld	%0, %1		# atomic64_add		\n"
> -		"	daddu	%0, %2					\n"
> -		"	scd	%0, %1					\n"
> -		"	beqz	%0, 2f					\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	.previous					\n"
> -		"	.set	mips0					\n"
> -		: "=&r" (temp), "=m" (v->counter)
> -		: "Ir" (i), "m" (v->counter));
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	lld	%0, %1		# atomic64_add	\n"

    You've kept the label here but it seems unused...

> +			"	daddu	%0, %2				\n"
> +			"	scd	%0, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "=m" (v->counter)
> +			: "Ir" (i), "m" (v->counter));
> +		} while (unlikely(!temp));
>  	} else {
>  		unsigned long flags;
>  
> @@ -488,18 +479,16 @@ static __inline__ void atomic64_sub(long
>  	} else if (kernel_uses_llsc) {
>  		long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	mips3					\n"
> -		"1:	lld	%0, %1		# atomic64_sub		\n"
> -		"	dsubu	%0, %2					\n"
> -		"	scd	%0, %1					\n"
> -		"	beqz	%0, 2f					\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	.previous					\n"
> -		"	.set	mips0					\n"
> -		: "=&r" (temp), "=m" (v->counter)
> -		: "Ir" (i), "m" (v->counter));
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	lld	%0, %1		# atomic64_sub	\n"

    Same here...

> +			"	dsubu	%0, %2				\n"
> +			"	scd	%0, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "=m" (v->counter)
> +			: "Ir" (i), "m" (v->counter));
> +		} while (unlikely(!temp));
>  	} else {
>  		unsigned long flags;
>  
> @@ -535,20 +524,19 @@ static __inline__ long atomic64_add_retu
>  	} else if (kernel_uses_llsc) {
>  		long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	mips3					\n"
> -		"1:	lld	%1, %2		# atomic64_add_return	\n"
> -		"	daddu	%0, %1, %3				\n"
> -		"	scd	%0, %2					\n"
> -		"	beqz	%0, 2f					\n"
> -		"	daddu	%0, %1, %3				\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	.previous					\n"
> -		"	.set	mips0					\n"
> -		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
> -		: "Ir" (i), "m" (v->counter)
> -		: "memory");
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	lld	%1, %2	# atomic64_add_return	\n"

    ... and here.

> +			"	daddu	%0, %1, %3			\n"
> +			"	scd	%0, %2				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (result), "=&r" (temp), "=m" (v->counter)
> +			: "Ir" (i), "m" (v->counter)
> +			: "memory");
> +		} while (unlikely(!result));
> +
> +		result = temp + i;
>  	} else {
>  		unsigned long flags;
>  
> @@ -587,20 +575,19 @@ static __inline__ long atomic64_sub_retu
>  	} else if (kernel_uses_llsc) {
>  		long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	mips3					\n"
> -		"1:	lld	%1, %2		# atomic64_sub_return	\n"
> -		"	dsubu	%0, %1, %3				\n"
> -		"	scd	%0, %2					\n"
> -		"	beqz	%0, 2f					\n"
> -		"	dsubu	%0, %1, %3				\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	.previous					\n"
> -		"	.set	mips0					\n"
> -		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
> -		: "Ir" (i), "m" (v->counter)
> -		: "memory");
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	lld	%1, %2	# atomic64_sub_return	\n"

    ... and here.

> +			"	dsubu	%0, %1, %3			\n"
> +			"	scd	%0, %2				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (result), "=&r" (temp), "=m" (v->counter)
> +			: "Ir" (i), "m" (v->counter)
> +			: "memory");
> +		} while (unlikely(!result));
> +
> +		result = temp - i;
>  	} else {
>  		unsigned long flags;
>  

> Index: linux-queue/arch/mips/include/asm/bitops.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/bitops.h
> +++ linux-queue/arch/mips/include/asm/bitops.h
> @@ -213,24 +205,22 @@ static inline void change_bit(unsigned l
>  		"	" __SC	"%0, %1				\n"
>  		"	beqzl	%0, 1b				\n"
>  		"	.set	mips0				\n"
> -		: "=&r" (temp), "=m" (*m)
> -		: "ir" (1UL << bit), "m" (*m));
> +		: "=&r" (temp), "+m" (*m)
> +		: "ir" (1UL << bit));
>  	} else if (kernel_uses_llsc) {
>  		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
>  		unsigned long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	mips3				\n"
> -		"1:	" __LL "%0, %1		# change_bit	\n"
> -		"	xor	%0, %2				\n"
> -		"	" __SC	"%0, %1				\n"
> -		"	beqz	%0, 2f				\n"
> -		"	.subsection 2				\n"
> -		"2:	b	1b				\n"
> -		"	.previous				\n"
> -		"	.set	mips0				\n"
> -		: "=&r" (temp), "=m" (*m)
> -		: "ir" (1UL << bit), "m" (*m));
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	" __LL "%0, %1		# change_bit	\n"

    ... and here too.

> +			"	xor	%0, %2				\n"
> +			"	" __SC	"%0, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "+m" (*m)
> +			: "ir" (1UL << bit));
> +		} while (unlikely(!temp));
>  	} else {
>  		volatile unsigned long *a = addr;
>  		unsigned long mask;
> @@ -272,30 +262,26 @@ static inline int test_and_set_bit(unsig
>  		"	beqzl	%2, 1b					\n"
>  		"	and	%2, %0, %3				\n"
>  		"	.set	mips0					\n"
> -		: "=&r" (temp), "=m" (*m), "=&r" (res)
> -		: "r" (1UL << bit), "m" (*m)
> +		: "=&r" (temp), "+m" (*m), "=&r" (res)
> +		: "r" (1UL << bit)
>  		: "memory");
>  	} else if (kernel_uses_llsc) {
>  		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
>  		unsigned long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	push					\n"
> -		"	.set	noreorder				\n"
> -		"	.set	mips3					\n"
> -		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
> -		"	or	%2, %0, %3				\n"
> -		"	" __SC	"%2, %1					\n"
> -		"	beqz	%2, 2f					\n"
> -		"	 and	%2, %0, %3				\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	 nop						\n"
> -		"	.previous					\n"
> -		"	.set	pop					\n"
> -		: "=&r" (temp), "=m" (*m), "=&r" (res)
> -		: "r" (1UL << bit), "m" (*m)
> -		: "memory");
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	" __LL "%0, %1	# test_and_set_bit	\n"

    ... and here as well.

> +			"	or	%2, %0, %3			\n"
> +			"	" __SC	"%2, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "+m" (*m), "=&r" (res)
> +			: "r" (1UL << bit)
> +			: "memory");
> +		} while (unlikely(!res));
> +
> +		res = temp & (1UL << bit);
>  	} else {
>  		volatile unsigned long *a = addr;
>  		unsigned long mask;
> @@ -340,30 +326,26 @@ static inline int test_and_set_bit_lock(
>  		"	beqzl	%2, 1b					\n"
>  		"	and	%2, %0, %3				\n"
>  		"	.set	mips0					\n"
> -		: "=&r" (temp), "=m" (*m), "=&r" (res)
> -		: "r" (1UL << bit), "m" (*m)
> +		: "=&r" (temp), "+m" (*m), "=&r" (res)
> +		: "r" (1UL << bit)
>  		: "memory");
>  	} else if (kernel_uses_llsc) {
>  		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
>  		unsigned long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	push					\n"
> -		"	.set	noreorder				\n"
> -		"	.set	mips3					\n"
> -		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
> -		"	or	%2, %0, %3				\n"
> -		"	" __SC	"%2, %1					\n"
> -		"	beqz	%2, 2f					\n"
> -		"	 and	%2, %0, %3				\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	 nop						\n"
> -		"	.previous					\n"
> -		"	.set	pop					\n"
> -		: "=&r" (temp), "=m" (*m), "=&r" (res)
> -		: "r" (1UL << bit), "m" (*m)
> -		: "memory");
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	" __LL "%0, %1	# test_and_set_bit	\n"

    ... and here.

> +			"	or	%2, %0, %3			\n"
> +			"	" __SC	"%2, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "+m" (*m), "=&r" (res)
> +			: "r" (1UL << bit)
> +			: "memory");
> +		} while(unlikely(!res));
> +
> +		res = temp & (1UL << bit);
>  	} else {
>  		volatile unsigned long *a = addr;
>  		unsigned long mask;
> @@ -410,49 +392,43 @@ static inline int test_and_clear_bit(uns
[...]
>  	} else if (kernel_uses_llsc) {
>  		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
>  		unsigned long temp;
>  
> -		__asm__ __volatile__(
> -		"	.set	push					\n"
> -		"	.set	noreorder				\n"
> -		"	.set	mips3					\n"
> -		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
> -		"	or	%2, %0, %3				\n"
> -		"	xor	%2, %3					\n"
> -		"	" __SC 	"%2, %1					\n"
> -		"	beqz	%2, 2f					\n"
> -		"	 and	%2, %0, %3				\n"
> -		"	.subsection 2					\n"
> -		"2:	b	1b					\n"
> -		"	 nop						\n"
> -		"	.previous					\n"
> -		"	.set	pop					\n"
> -		: "=&r" (temp), "=m" (*m), "=&r" (res)
> -		: "r" (1UL << bit), "m" (*m)
> -		: "memory");
> +		do {
> +			__asm__ __volatile__(
> +			"	.set	mips3				\n"
> +			"1:	" __LL	"%0, %1	# test_and_clear_bit	\n"

    ... and here.

> +			"	or	%2, %0, %3			\n"
> +			"	xor	%2, %3				\n"
> +			"	" __SC 	"%2, %1				\n"
> +			"	.set	mips0				\n"
> +			: "=&r" (temp), "+m" (*m), "=&r" (res)
> +			: "r" (1UL << bit)
> +			: "memory");
> +		} while (unlikely(!res));
> +
> +		res = temp & (1UL << bit);
>  	} else {
>  		volatile unsigned long *a = addr;
>  		unsigned long mask;

WBR, Sergei

  reply	other threads:[~2010-08-18 13:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-18 12:43 MIPS: Get rid of branches to .subsections Ralf Baechle
2010-08-18 13:25 ` Sergei Shtylyov [this message]
2010-08-18 13:42   ` Ralf Baechle
2010-08-18 13:33 ` Matt Fleming
2010-08-23  0:54 ` Maciej W. Rozycki
2010-08-23 10:12   ` Ralf Baechle
2010-08-23 11:25     ` Maciej W. Rozycki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C6BDF40.9050804@mvista.com \
    --to=sshtylyov@mvista.com \
    --cc=linux-mips@linux-mips.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=ralf@linux-mips.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox