From: Sergei Shtylyov <sshtylyov@mvista.com>
To: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org, Paul Gortmaker <paul.gortmaker@windriver.com>
Subject: Re: MIPS: Get rid of branches to .subsections.
Date: Wed, 18 Aug 2010 17:25:20 +0400 [thread overview]
Message-ID: <4C6BDF40.9050804@mvista.com> (raw)
In-Reply-To: <20100818124310.GA23744@linux-mips.org>
Hello.
Ralf Baechle wrote:
> It was a nice optimization - on paper at least. In practice it results in
> branches that may exceed the maximum legal range for a branch. We can
> fight that problem with -ffunction-sections but -ffunction-sections again
> is incompatible with -pg used by the function tracer.
> By rewriting the loop around all simple LL/SC blocks to C we reduce reduce
> the amount of inline assembler and at the same time allow GCC to often
> fill the branch delay slots with something sensible or whever else clever
^^^^^^
Whichever?
> optimization it may have up in its sleeve.
> With this optimization gone we also no longer need -ffunction-sections,
> so drop it.
> This optimization was originall introduced in 2.6.21, commit
^^^^^^^^^
Originally.
> 5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
> f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).
> Original fix for the issues which caused me to pull this optimization by
> Paul Gortmaker <paul.gortmaker@windriver.com>.
> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
> Index: linux-queue/arch/mips/include/asm/atomic.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/atomic.h
> +++ linux-queue/arch/mips/include/asm/atomic.h
[...]
> @@ -443,18 +436,16 @@ static __inline__ void atomic64_add(long
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %0, %1 # atomic64_add \n"
> - " daddu %0, %2 \n"
> - " scd %0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %0, %1 # atomic64_add \n"
You've kept the label here but it seems unused...
> + " daddu %0, %2 \n"
> + " scd %0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter));
> + } while (unlikely(!temp));
> } else {
> unsigned long flags;
>
> @@ -488,18 +479,16 @@ static __inline__ void atomic64_sub(long
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %0, %1 # atomic64_sub \n"
> - " dsubu %0, %2 \n"
> - " scd %0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %0, %1 # atomic64_sub \n"
Same here...
> + " dsubu %0, %2 \n"
> + " scd %0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter));
> + } while (unlikely(!temp));
> } else {
> unsigned long flags;
>
> @@ -535,20 +524,19 @@ static __inline__ long atomic64_add_retu
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %1, %2 # atomic64_add_return \n"
> - " daddu %0, %1, %3 \n"
> - " scd %0, %2 \n"
> - " beqz %0, 2f \n"
> - " daddu %0, %1, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %1, %2 # atomic64_add_return \n"
... and here.
> + " daddu %0, %1, %3 \n"
> + " scd %0, %2 \n"
> + " .set mips0 \n"
> + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter)
> + : "memory");
> + } while (unlikely(!result));
> +
> + result = temp + i;
> } else {
> unsigned long flags;
>
> @@ -587,20 +575,19 @@ static __inline__ long atomic64_sub_retu
> } else if (kernel_uses_llsc) {
> long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: lld %1, %2 # atomic64_sub_return \n"
> - " dsubu %0, %1, %3 \n"
> - " scd %0, %2 \n"
> - " beqz %0, 2f \n"
> - " dsubu %0, %1, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> - : "Ir" (i), "m" (v->counter)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: lld %1, %2 # atomic64_sub_return \n"
... and here.
> + " dsubu %0, %1, %3 \n"
> + " scd %0, %2 \n"
> + " .set mips0 \n"
> + : "=&r" (result), "=&r" (temp), "=m" (v->counter)
> + : "Ir" (i), "m" (v->counter)
> + : "memory");
> + } while (unlikely(!result));
> +
> + result = temp - i;
> } else {
> unsigned long flags;
>
> Index: linux-queue/arch/mips/include/asm/bitops.h
> ===================================================================
> --- linux-queue.orig/arch/mips/include/asm/bitops.h
> +++ linux-queue/arch/mips/include/asm/bitops.h
> @@ -213,24 +205,22 @@ static inline void change_bit(unsigned l
> " " __SC "%0, %1 \n"
> " beqzl %0, 1b \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m)
> - : "ir" (1UL << bit), "m" (*m));
> + : "=&r" (temp), "+m" (*m)
> + : "ir" (1UL << bit));
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # change_bit \n"
> - " xor %0, %2 \n"
> - " " __SC "%0, %1 \n"
> - " beqz %0, 2f \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " .previous \n"
> - " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m)
> - : "ir" (1UL << bit), "m" (*m));
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # change_bit \n"
... and here too.
> + " xor %0, %2 \n"
> + " " __SC "%0, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m)
> + : "ir" (1UL << bit));
> + } while (unlikely(!temp));
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -272,30 +262,26 @@ static inline int test_and_set_bit(unsig
> " beqzl %2, 1b \n"
> " and %2, %0, %3 \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> : "memory");
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_set_bit \n"
> - " or %2, %0, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_set_bit \n"
... and here as well.
> + " or %2, %0, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while (unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -340,30 +326,26 @@ static inline int test_and_set_bit_lock(
> " beqzl %2, 1b \n"
> " and %2, %0, %3 \n"
> " .set mips0 \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> : "memory");
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_set_bit \n"
> - " or %2, %0, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_set_bit \n"
... and here.
> + " or %2, %0, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while(unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
> @@ -410,49 +392,43 @@ static inline int test_and_clear_bit(uns
[...]
> } else if (kernel_uses_llsc) {
> unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> unsigned long temp;
>
> - __asm__ __volatile__(
> - " .set push \n"
> - " .set noreorder \n"
> - " .set mips3 \n"
> - "1: " __LL "%0, %1 # test_and_clear_bit \n"
> - " or %2, %0, %3 \n"
> - " xor %2, %3 \n"
> - " " __SC "%2, %1 \n"
> - " beqz %2, 2f \n"
> - " and %2, %0, %3 \n"
> - " .subsection 2 \n"
> - "2: b 1b \n"
> - " nop \n"
> - " .previous \n"
> - " .set pop \n"
> - : "=&r" (temp), "=m" (*m), "=&r" (res)
> - : "r" (1UL << bit), "m" (*m)
> - : "memory");
> + do {
> + __asm__ __volatile__(
> + " .set mips3 \n"
> + "1: " __LL "%0, %1 # test_and_clear_bit \n"
... and here.
> + " or %2, %0, %3 \n"
> + " xor %2, %3 \n"
> + " " __SC "%2, %1 \n"
> + " .set mips0 \n"
> + : "=&r" (temp), "+m" (*m), "=&r" (res)
> + : "r" (1UL << bit)
> + : "memory");
> + } while (unlikely(!res));
> +
> + res = temp & (1UL << bit);
> } else {
> volatile unsigned long *a = addr;
> unsigned long mask;
WBR, Sergei
next prev parent reply other threads:[~2010-08-18 13:26 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-18 12:43 MIPS: Get rid of branches to .subsections Ralf Baechle
2010-08-18 13:25 ` Sergei Shtylyov [this message]
2010-08-18 13:42 ` Ralf Baechle
2010-08-18 13:33 ` Matt Fleming
2010-08-23 0:54 ` Maciej W. Rozycki
2010-08-23 10:12 ` Ralf Baechle
2010-08-23 11:25 ` Maciej W. Rozycki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C6BDF40.9050804@mvista.com \
--to=sshtylyov@mvista.com \
--cc=linux-mips@linux-mips.org \
--cc=paul.gortmaker@windriver.com \
--cc=ralf@linux-mips.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.