From: Sergei Shtylyov <sshtylyov@mvista.com>
To: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org, Paul Gortmaker <paul.gortmaker@windriver.com>
Subject: Re: MIPS: Get rid of branches to .subsections.
Date: Wed, 18 Aug 2010 17:25:20 +0400 [thread overview]
Message-ID: <4C6BDF40.9050804@mvista.com> (raw)
In-Reply-To: <20100818124310.GA23744@linux-mips.org>
Hello.
Ralf Baechle wrote:
> It was a nice optimization - on paper at least. In practice it results in
> branches that may exceed the maximum legal range for a branch. We can
> fight that problem with -ffunction-sections but -ffunction-sections again
> is incompatible with -pg used by the function tracer.
> By rewriting the loop around all simple LL/SC blocks to C we reduce reduce
> the amount of inline assembler and at the same time allow GCC to often
> fill the branch delay slots with something sensible or whever else clever
^^^^^^
Whichever?
> optimization it may have up in its sleeve.
> With this optimization gone we also no longer need -ffunction-sections,
> so drop it.
> This optimization was originall introduced in 2.6.21, commit
^^^^^^^^^
Originally.
> 5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
> f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).
> Original fix for the issues which caused me to pull this optimization by
> Paul Gortmaker <paul.gortmaker@windriver.com>.
> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
> Index: linux-queue/arch/mips/include/asm/atomic.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/atomic.h
> +++ linux-queue/arch/mips/include/asm/atomic.h
[...]
> @@ -443,18 +436,16 @@ static __inline__ void atomic64_add(long
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %0, %1 # atomic64_add \n"
> - " daddu %0, %2 \n"
> - " scd %0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %0, %1 # atomic64_add \n"
You've kept the label here but it seems unused...
> + " daddu %0, %2 \n"
> + " scd %0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter));
> + } while (unlikely(!temp));
> } else {
> unsigned long flags;
>
> @@ -488,18 +479,16 @@ static __inline__ void atomic64_sub(long
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %0, %1 # atomic64_sub \n"
> - " dsubu %0, %2 \n"
> - " scd %0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %0, %1 # atomic64_sub \n"
Same here...
> + " dsubu %0, %2 \n"
> + " scd %0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter));
> + } while (unlikely(!temp));
> } else {
> unsigned long flags;
>
> @@ -535,20 +524,19 @@ static __inline__ long atomic64_add_retu
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %1, %2 # atomic64_add_return \n"
> - " daddu %0, %1, %3 \n"
> - " scd %0, %2 \n"
> - " beqz %0, 2f \n"
> - " daddu %0, %1, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %1, %2 # atomic64_add_return \n"
... and here.
> + " daddu %0, %1, %3 \n"
> + " scd %0, %2 \n"
> + " .set mips0 \n"
> + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter)
> + : "memory");
> + } while (unlikely(!result));
> +
> + result = temp + i;
> } else {
> unsigned long flags;
>
> @@ -587,20 +575,19 @@ static __inline__ long atomic64_sub_retu
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %1, %2 # atomic64_sub_return \n"
> - " dsubu %0, %1, %3 \n"
> - " scd %0, %2 \n"
> - " beqz %0, 2f \n"
> - " dsubu %0, %1, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %1, %2 # atomic64_sub_return \n"
... and here.
> + " dsubu %0, %1, %3 \n"
> + " scd %0, %2 \n"
> + " .set mips0 \n"
> + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter)
> + : "memory");
> + } while (unlikely(!result));
> +
> + result = temp - i;
> } else {
> unsigned long flags;
>
> Index: linux-queue/arch/mips/include/asm/bitops.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/bitops.h
> +++ linux-queue/arch/mips/include/asm/bitops.h
> @@ -213,24 +205,22 @@ static inline void change_bit(unsigned l
> " " __SC "%0, %1 \n"
> " beqzl %0, 1b \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m)
> - : "ir" (1UL << bit), "m" (*m));
> + : "=&r" (temp), "+m" (*m)
> + : "ir" (1UL << bit));
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # change_bit \n"
> - " xor %0, %2 \n"
> - " " __SC "%0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m)
> - : "ir" (1UL << bit), "m" (*m));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # change_bit \n"
... and here too.
> + " xor %0, %2 \n"
> + " " __SC "%0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m)
> + : "ir" (1UL << bit));
> + } while (unlikely(!temp));
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -272,30 +262,26 @@ static inline int test_and_set_bit(unsig
> " beqzl %2, 1b \n"
> " and %2, %0, %3 \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> : "memory");
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_set_bit \n"
> - " or %2, %0, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_set_bit \n"
... and here as well.
> + " or %2, %0, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while (unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -340,30 +326,26 @@ static inline int test_and_set_bit_lock(
> " beqzl %2, 1b \n"
> " and %2, %0, %3 \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> : "memory");
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_set_bit \n"
> - " or %2, %0, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_set_bit \n"
... and here.
> + " or %2, %0, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while(unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -410,49 +392,43 @@ static inline int test_and_clear_bit(uns
[...]
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_clear_bit \n"
> - " or %2, %0, %3 \n"
> - " xor %2, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_clear_bit \n"
... and here.
> + " or %2, %0, %3 \n"
> + " xor %2, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while (unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
WBR, Sergei
next prev parent reply other threads:[~2010-08-18 13:26 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-18 12:43 MIPS: Get rid of branches to .subsections Ralf Baechle
2010-08-18 13:25 ` Sergei Shtylyov [this message]
2010-08-18 13:42 ` Ralf Baechle
2010-08-18 13:33 ` Matt Fleming
2010-08-23 0:54 ` Maciej W. Rozycki
2010-08-23 10:12 ` Ralf Baechle
2010-08-23 11:25 ` Maciej W. Rozycki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C6BDF40.9050804@mvista.com \
--to=sshtylyov@mvista.com \
--cc=linux-mips@linux-mips.org \
--cc=paul.gortmaker@windriver.com \
--cc=ralf@linux-mips.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox