* PATCH: Fix ll/sc for mips
@ 2002-01-31 20:35 H . J . Lu
2002-01-31 22:17 ` Maciej W. Rozycki
0 siblings, 1 reply; 75+ messages in thread
From: H . J . Lu @ 2002-01-31 20:35 UTC (permalink / raw)
To: GNU C Library, linux-mips
I believe this patch fixes all the ll/sc bugs I found in glibc. Any
comments?
Thanks.
H.J.
----
2002-01-31 H.J. Lu <hjl@gnu.org>
* ysdeps/mips/pspinlock.c (__pthread_spin_lock): Use a
different register in the delayed slot.
* sysdeps/mips/pt-machine.h (testandset): Call _test_and_set.
(__compare_and_swap): Return 0 when failed to compare or swap.
2002-01-31 H.J. Lu <hjl@gnu.org>
* sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when
failed to compare or swap.
* sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Fill
the delay slot.
--- glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c.llsc Wed Jan 30 16:06:28 2002
+++ glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c Thu Jan 31 10:27:10 2002
@@ -29,20 +29,21 @@
int
__pthread_spin_lock (pthread_spinlock_t *lock)
{
- unsigned int tmp;
+ unsigned int tmp1, tmp2;
asm volatile
("\t\t\t# spin_lock\n\t"
- "1:\n\t"
- "ll %1,%2\n\t"
".set push\n\t"
- ".set noreorder\n\t"
+ ".set noreorder\n"
+ "1:\n\t"
+ "ll %1,%3\n\t"
"bnez %1,1b\n\t"
- " li %1,1\n\t"
- ".set pop\n\t"
- "sc %1,%0\n\t"
- "beqz %1,1b"
- : "=m" (*lock), "=&r" (tmp)
+ "li %2,1\n\t"
+ "sc %2,%0\n\t"
+ "beqz %2,1b\n\t"
+ "nop\n\t"
+ ".set pop"
+ : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2)
: "m" (*lock)
: "memory");
--- glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h.llsc Sat Dec 9 08:55:34 2000
+++ glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h Thu Jan 31 10:02:53 2002
@@ -33,41 +33,11 @@
/* Spinlock implementation; required. */
-#if (_MIPS_ISA >= _MIPS_ISA_MIPS2)
-
-PT_EI long int
-testandset (int *spinlock)
-{
- long int ret, temp;
-
- __asm__ __volatile__
- ("/* Inline spinlock test & set */\n\t"
- "1:\n\t"
- "ll %0,%3\n\t"
- ".set push\n\t"
- ".set noreorder\n\t"
- "bnez %0,2f\n\t"
- " li %1,1\n\t"
- ".set pop\n\t"
- "sc %1,%2\n\t"
- "beqz %1,1b\n"
- "2:\n\t"
- "/* End spinlock test & set */"
- : "=&r" (ret), "=&r" (temp), "=m" (*spinlock)
- : "m" (*spinlock)
- : "memory");
-
- return ret;
-}
-
-#else /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */
-
PT_EI long int
testandset (int *spinlock)
{
return _test_and_set (spinlock, 1);
}
-#endif /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */
/* Get some notion of the current stack. Need not be exactly the top
@@ -84,22 +54,21 @@ register char * stack_pointer __asm__ ("
PT_EI int
__compare_and_swap (long int *p, long int oldval, long int newval)
{
- long int ret;
+ long int ret, temp;
__asm__ __volatile__
("/* Inline compare & swap */\n\t"
- "1:\n\t"
- "ll %0,%4\n\t"
- ".set push\n"
+ "ll %1,%5\n\t"
+ ".set push\n\t"
".set noreorder\n\t"
- "bne %0,%2,2f\n\t"
- " move %0,%3\n\t"
+ "bne %1,%3,1f\n\t"
+ "move %0,$0\n\t"
".set pop\n\t"
- "sc %0,%1\n\t"
- "beqz %0,1b\n"
- "2:\n\t"
+ "move %0,%4\n\t"
+ "sc %0,%2\n"
+ "1:\n\t"
"/* End compare & swap */"
- : "=&r" (ret), "=m" (*p)
+ : "=&r" (ret), "=&r" (temp), "=m" (*p)
: "r" (oldval), "r" (newval), "m" (*p)
: "memory");
--- glibc-2.2.4/sysdeps/mips/atomicity.h.llsc Mon Jul 9 11:58:19 2001
+++ glibc-2.2.4/sysdeps/mips/atomicity.h Thu Jan 31 10:36:47 2002
@@ -32,12 +32,16 @@ exchange_and_add (volatile uint32_t *mem
int result, tmp;
__asm__ __volatile__
- ("/* Inline exchange & add */\n\t"
+ ("/* Inline exchange & add */\n"
"1:\n\t"
"ll %0,%3\n\t"
"addu %1,%4,%0\n\t"
"sc %1,%2\n\t"
+ ".set push\n\t"
+ ".set noreorder\n\t"
"beqz %1,1b\n\t"
+ "nop\n\t"
+ ".set pop\n\t"
"/* End exchange & add */"
: "=&r"(result), "=&r"(tmp), "=m"(*mem)
: "m" (*mem), "r"(val)
@@ -53,12 +57,16 @@ atomic_add (volatile uint32_t *mem, int
int result;
__asm__ __volatile__
- ("/* Inline atomic add */\n\t"
+ ("/* Inline atomic add */\n"
"1:\n\t"
"ll %0,%2\n\t"
"addu %0,%3,%0\n\t"
"sc %0,%1\n\t"
+ ".set push\n\t"
+ ".set noreorder\n\t"
"beqz %0,1b\n\t"
+ "nop\n\t"
+ ".set pop\n\t"
"/* End atomic add */"
: "=&r"(result), "=m"(*mem)
: "m" (*mem), "r"(val)
@@ -69,22 +77,21 @@ static inline int
__attribute__ ((unused))
compare_and_swap (volatile long int *p, long int oldval, long int newval)
{
- long int ret;
+ long int ret, temp;
__asm__ __volatile__
("/* Inline compare & swap */\n\t"
- "1:\n\t"
- "ll %0,%4\n\t"
- ".set push\n"
+ "ll %1,%5\n\t"
+ ".set push\n\t"
".set noreorder\n\t"
- "bne %0,%2,2f\n\t"
- "move %0,%3\n\t"
+ "bne %1,%3,1f\n\t"
+ "move %0,$0\n\t"
".set pop\n\t"
- "sc %0,%1\n\t"
- "beqz %0,1b\n"
- "2:\n\t"
+ "move %0,%4\n\t"
+ "sc %0,%2\n"
+ "1:\n\t"
"/* End compare & swap */"
- : "=&r" (ret), "=m" (*p)
+ : "=&r" (ret), "=&r" (temp), "=m" (*p)
: "r" (oldval), "r" (newval), "m" (*p)
: "memory");
--- glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Jul 9 11:58:45 2001
+++ glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h Thu Jan 31 10:24:44 2002
@@ -42,16 +42,17 @@ _test_and_set (int *p, int v) __THROW
int r, t;
__asm__ __volatile__
- ("1:\n\t"
+ (".set push\n\t"
+ ".set noreorder\n"
+ "1:\n\t"
"ll %0,%3\n\t"
- ".set push\n\t"
- ".set noreorder\n\t"
"beq %0,%4,2f\n\t"
- " move %1,%4\n\t"
- ".set pop\n\t"
+ "move %1,%4\n\t"
"sc %1,%2\n\t"
- "beqz %1,1b\n"
- "2:\n"
+ "beqz %1,1b\n\t"
+ "nop\n"
+ "2:\n\t"
+ ".set pop"
: "=&r" (r), "=&r" (t), "=m" (*p)
: "m" (*p), "r" (v)
: "memory");
^ permalink raw reply [flat|nested] 75+ messages in thread* Re: PATCH: Fix ll/sc for mips 2002-01-31 20:35 PATCH: Fix ll/sc for mips H . J . Lu @ 2002-01-31 22:17 ` Maciej W. Rozycki 2002-01-31 22:41 ` H . J . Lu 2002-01-31 23:33 ` [libc-alpha] " Kaz Kylheku 0 siblings, 2 replies; 75+ messages in thread From: Maciej W. Rozycki @ 2002-01-31 22:17 UTC (permalink / raw) To: H . J . Lu; +Cc: GNU C Library, linux-mips On Thu, 31 Jan 2002, H . J . Lu wrote: > (__compare_and_swap): Return 0 when failed to compare or swap. [...] > * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when > failed to compare or swap. Looking at the i486 implementation these are not expected to fail. Unless I am missing something... > * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Fill > the delay slot. What's the difference? The code looks the same after changes. Also you forgot to indent instructions in delay slots, which worsens readability. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-01-31 22:17 ` Maciej W. Rozycki @ 2002-01-31 22:41 ` H . J . Lu 2002-02-01 3:35 ` Hiroyuki Machida 2002-01-31 23:33 ` [libc-alpha] " Kaz Kylheku 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-01-31 22:41 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: GNU C Library, linux-mips On Thu, Jan 31, 2002 at 11:17:21PM +0100, Maciej W. Rozycki wrote: > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > (__compare_and_swap): Return 0 when failed to compare or swap. > [...] > > * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when > > failed to compare or swap. > > Looking at the i486 implementation these are not expected to fail. > Unless I am missing something... Why can't it fail? static inline char __attribute__ ((unused)) compare_and_swap (volatile long int *p, long int oldval, long int newval) { char ret; long int readval; __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0" : "=q" (ret), "=m" (*p), "=a" (readval) : "r" (newval), "1" (*p), "a" (oldval)); return ret; } It will fail if *p is not same as oldval. > > > * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Fill > > the delay slot. > > What's the difference? The code looks the same after changes. Also you > forgot to indent instructions in delay slots, which worsens readability. Are we looking at the same file? Here is the patched version: _EXTERN_INLINE int _test_and_set (int *p, int v) __THROW { int r, t; __asm__ __volatile__ (".set push\n\t" ".set noreorder\n" "1:\n\t" "ll %0,%3\n\t" "beq %0,%4,2f\n\t" "move %1,%4\n\t" "sc %1,%2\n\t" "beqz %1,1b\n\t" "nop\n" "2:\n\t" ".set pop" : "=&r" (r), "=&r" (t), "=m" (*p) : "m" (*p), "r" (v) : "memory"); return r; } H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-01-31 22:41 ` H . J . Lu @ 2002-02-01 3:35 ` Hiroyuki Machida 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku ` (2 more replies) 0 siblings, 3 replies; 75+ messages in thread From: Hiroyuki Machida @ 2002-02-01 3:35 UTC (permalink / raw) To: hjl; +Cc: macro, libc-alpha, linux-mips From: "H . J . Lu" <hjl@lucon.org> Subject: Re: PATCH: Fix ll/sc for mips Date: Thu, 31 Jan 2002 14:41:00 -0800 > On Thu, Jan 31, 2002 at 11:17:21PM +0100, Maciej W. Rozycki wrote: > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > > (__compare_and_swap): Return 0 when failed to compare or swap. > > [...] > > > * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when > > > failed to compare or swap. > > > > Looking at the i486 implementation these are not expected to fail. > > Unless I am missing something... > > Why can't it fail? > > static inline char > __attribute__ ((unused)) > compare_and_swap (volatile long int *p, long int oldval, long int newval) > { > char ret; > long int readval; > > __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0" > : "=q" (ret), "=m" (*p), "=a" (readval) > : "r" (newval), "1" (*p), "a" (oldval)); > return ret; > } > > It will fail if *p is not same as oldval. Please note that "sc" may fail even if nobody write the variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See MIPS RUN" for more detail.) So, after your patch applied, compare_and_swap() may fail, even if *p is equal to oldval. > > > > > * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Fill > > > the delay slot. > > > > What's the difference? The code looks the same after changes. Also you > > forgot to indent instructions in delay slots, which worsens readability. > > Are we looking at the same file? Here is the patched version: > > _EXTERN_INLINE int > _test_and_set (int *p, int v) __THROW > { > int r, t; > > __asm__ __volatile__ > (".set push\n\t" > ".set noreorder\n" > "1:\n\t" > "ll %0,%3\n\t" > "beq %0,%4,2f\n\t" > "move %1,%4\n\t" > "sc %1,%2\n\t" > "beqz %1,1b\n\t" > "nop\n" > "2:\n\t" > ".set pop" > : "=&r" (r), "=&r" (t), "=m" (*p) > : "m" (*p), "r" (v) > : "memory"); > > return r; > } Gas will fill delay slots. Same object codes will be produced, so I think you don't have to do that by hand. --- Hiroyuki Machida Sony Corp. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 3:35 ` Hiroyuki Machida @ 2002-02-01 4:02 ` Kaz Kylheku 2002-02-01 4:02 ` Kaz Kylheku ` (2 more replies) 2002-02-01 7:17 ` H . J . Lu 2002-02-01 11:50 ` Maciej W. Rozycki 2 siblings, 3 replies; 75+ messages in thread From: Kaz Kylheku @ 2002-02-01 4:02 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: hjl, macro, libc-alpha, linux-mips On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > Please note that "sc" may fail even if nobody write the > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > MIPS RUN" for more detail.) > So, after your patch applied, compare_and_swap() may fail, even if > *p is equal to oldval. I can't think of anything that will break because of this, as long as the compare_and_swap eventually succeeds on some subsequent trial. If the atomic operation has to abort for some reason other than *p being unequal to oldval, that should be cool. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku @ 2002-02-01 4:02 ` Kaz Kylheku 2002-02-01 4:59 ` Hiroyuki Machida 2002-02-01 7:10 ` H . J . Lu 2 siblings, 0 replies; 75+ messages in thread From: Kaz Kylheku @ 2002-02-01 4:02 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: hjl, macro, libc-alpha, linux-mips On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > Please note that "sc" may fail even if nobody write the > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > MIPS RUN" for more detail.) > So, after your patch applied, compare_and_swap() may fail, even if > *p is equal to oldval. I can't think of anything that will break because of this, as long as the compare_and_swap eventually succeeds on some subsequent trial. If the atomic operation has to abort for some reason other than *p being unequal to oldval, that should be cool. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku 2002-02-01 4:02 ` Kaz Kylheku @ 2002-02-01 4:59 ` Hiroyuki Machida 2002-02-01 7:00 ` H . J . Lu 2002-02-01 10:49 ` Andreas Schwab 2002-02-01 7:10 ` H . J . Lu 2 siblings, 2 replies; 75+ messages in thread From: Hiroyuki Machida @ 2002-02-01 4:59 UTC (permalink / raw) To: kaz; +Cc: hjl, macro, libc-alpha, linux-mips From: Kaz Kylheku <kaz@ashi.footprints.net> Subject: Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips Date: Thu, 31 Jan 2002 20:02:25 -0800 (PST) > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > Please note that "sc" may fail even if nobody write the > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > > MIPS RUN" for more detail.) > > So, after your patch applied, compare_and_swap() may fail, even if > > *p is equal to oldval. > > I can't think of anything that will break because of this, as long > as the compare_and_swap eventually succeeds on some subsequent trial. > If the atomic operation has to abort for some reason other than *p being > unequal to oldval, that should be cool. I mean that this patch breaks the spec of compare_and_swap(). In most case, this patch may works as Kaz said. If this patch have no side-effect to any application, it's ok to apply the patch. But we can't know how to use compare_and_swap() in all aplications in a whole world. So we have to follow the spec. --- Hiroyuki Machida Sony Corp. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 4:59 ` Hiroyuki Machida @ 2002-02-01 7:00 ` H . J . Lu 2002-02-01 11:12 ` Hiroyuki Machida 2002-02-01 10:49 ` Andreas Schwab 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-01 7:00 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: kaz, macro, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 01:59:03PM +0900, Hiroyuki Machida wrote: > > From: Kaz Kylheku <kaz@ashi.footprints.net> > Subject: Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips > Date: Thu, 31 Jan 2002 20:02:25 -0800 (PST) > > > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > > Please note that "sc" may fail even if nobody write the > > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > > > MIPS RUN" for more detail.) > > > So, after your patch applied, compare_and_swap() may fail, even if > > > *p is equal to oldval. > > > > I can't think of anything that will break because of this, as long > > as the compare_and_swap eventually succeeds on some subsequent trial. > > If the atomic operation has to abort for some reason other than *p being > > unequal to oldval, that should be cool. > > I mean that this patch breaks the spec of compare_and_swap(). > In most case, this patch may works as Kaz said. If this patch have > no side-effect to any application, it's ok to apply the patch. But > we can't know how to use compare_and_swap() in all aplications in a > whole world. So we have to follow the spec. > Please note that the old compare_and_swap is broken. If you use compare_and_swap to check if *p == oldval, my patch doesn't help you. But if you use it to swap old/new, my patch works fine. But I don't think you can use it check if *p == oldval since *p can change at any time. It is the same as simply using "*p == oldval". I don't see my patch should break any sane applications. H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 7:00 ` H . J . Lu @ 2002-02-01 11:12 ` Hiroyuki Machida 0 siblings, 0 replies; 75+ messages in thread From: Hiroyuki Machida @ 2002-02-01 11:12 UTC (permalink / raw) To: hjl; +Cc: kaz, macro, libc-alpha, linux-mips From: "H . J . Lu" <hjl@lucon.org> Subject: Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips Date: Thu, 31 Jan 2002 23:00:50 -0800 > On Fri, Feb 01, 2002 at 01:59:03PM +0900, Hiroyuki Machida wrote: > > > > From: Kaz Kylheku <kaz@ashi.footprints.net> > > Subject: Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips > > Date: Thu, 31 Jan 2002 20:02:25 -0800 (PST) > > > > > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > > > Please note that "sc" may fail even if nobody write the > > > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > > > > MIPS RUN" for more detail.) > > > > So, after your patch applied, compare_and_swap() may fail, even if > > > > *p is equal to oldval. > > > > > > I can't think of anything that will break because of this, as long > > > as the compare_and_swap eventually succeeds on some subsequent trial. > > > If the atomic operation has to abort for some reason other than *p being > > > unequal to oldval, that should be cool. > > > > I mean that this patch breaks the spec of compare_and_swap(). > > In most case, this patch may works as Kaz said. If this patch have > > no side-effect to any application, it's ok to apply the patch. But > > we can't know how to use compare_and_swap() in all aplications in a > > whole world. So we have to follow the spec. > > > > Please note that the old compare_and_swap is broken. If you use > compare_and_swap to check if *p == oldval, my patch doesn't help > you. But if you use it to swap old/new, my patch works fine. But I > don't think you can use it check if *p == oldval since *p can change > at any time. It is the same as simply using "*p == oldval". I don't > see my patch should break any sane applications. > > > H.J. > I know the orinal compare_and_swap() is bad, and I believe the spec of compare_and_swap() as below; compare_and_swap(p, oldval, newval) { retval = 0; begin_atomic if (*p==oldval) { *p = newval; retval = 1; } end_atomic return retval; } So, compare_and_swap() should be ... __compare_and_swap (a0 long int oldval, long int newval) a0: *p a1: oldval a2: newval v0: return value .set noreorder retry: ll v0, (a0) bne v0, a1 move v0, zero move v0, a2 sc v0, (a0) beqz v0, retry nop j ra But, with your patch ... .set noreorder ll t0, (a0) bne t0, a1 move v0, zero move v0, a sc v0, (a0) j ra In this way, compare_and_swap() was changed as compare_and_swap(p, oldval, newval) { retval = 0; begin_atomic if (*p==oldval) { if "sc" was failed goto out; *p = newval; retval = 1; } out: end_atomic return retval; } --- Hiroyuki Machida Sony Corp. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 4:59 ` Hiroyuki Machida 2002-02-01 7:00 ` H . J . Lu @ 2002-02-01 10:49 ` Andreas Schwab 2002-02-01 11:23 ` Geoff Keating 1 sibling, 1 reply; 75+ messages in thread From: Andreas Schwab @ 2002-02-01 10:49 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: kaz, hjl, macro, libc-alpha, linux-mips Hiroyuki Machida <machida@sm.sony.co.jp> writes: |> From: Kaz Kylheku <kaz@ashi.footprints.net> |> Subject: Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips |> Date: Thu, 31 Jan 2002 20:02:25 -0800 (PST) |> |> > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: |> > > Please note that "sc" may fail even if nobody write the |> > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See |> > > MIPS RUN" for more detail.) |> > > So, after your patch applied, compare_and_swap() may fail, even if |> > > *p is equal to oldval. |> > |> > I can't think of anything that will break because of this, as long |> > as the compare_and_swap eventually succeeds on some subsequent trial. |> > If the atomic operation has to abort for some reason other than *p being |> > unequal to oldval, that should be cool. |> |> I mean that this patch breaks the spec of compare_and_swap(). |> |> In most case, this patch may works as Kaz said. If this patch have |> no side-effect to any application, it's ok to apply the patch. But |> we can't know how to use compare_and_swap() in all aplications in a |> whole world. So we have to follow the spec. There is no way to find out anything about intermediate values of *p when compare_and_swap returns zero. The value of *p can change anytime, even if it only was different from oldval just at the time compare_and_swap did the comparison. So there is zero chance that a spurious failure of compare_and_swap breaks anything. Andreas. -- Andreas Schwab, SuSE Labs, schwab@suse.de SuSE GmbH, Deutschherrnstr. 15-19, D-90429 Nürnberg Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different." ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 10:49 ` Andreas Schwab @ 2002-02-01 11:23 ` Geoff Keating 0 siblings, 0 replies; 75+ messages in thread From: Geoff Keating @ 2002-02-01 11:23 UTC (permalink / raw) To: schwab; +Cc: machida, kaz, hjl, macro, libc-alpha, linux-mips > From: Andreas Schwab <schwab@suse.de> > Date: Fri, 01 Feb 2002 11:49:22 +0100 > There is no way to find out anything about intermediate values of *p when > compare_and_swap returns zero. The value of *p can change anytime, even > if it only was different from oldval just at the time compare_and_swap did > the comparison. So there is zero chance that a spurious failure of > compare_and_swap breaks anything. Something to watch out for, though, is livelock. Consider the situation in which two processors are competing for a cache line, and both only win at the 'wrong' time: when computing a new value to be passed to compare_and_swap rather than when actually trying to perform the compare_and_swap. This is why on powerpc the loop is coded in the asm statement. -- - Geoffrey Keating <geoffk@geoffk.org> <geoffk@redhat.com> ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku 2002-02-01 4:02 ` Kaz Kylheku 2002-02-01 4:59 ` Hiroyuki Machida @ 2002-02-01 7:10 ` H . J . Lu 2 siblings, 0 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-01 7:10 UTC (permalink / raw) To: Kaz Kylheku; +Cc: Hiroyuki Machida, macro, libc-alpha, linux-mips On Thu, Jan 31, 2002 at 08:02:25PM -0800, Kaz Kylheku wrote: > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > Please note that "sc" may fail even if nobody write the > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > > MIPS RUN" for more detail.) > > So, after your patch applied, compare_and_swap() may fail, even if > > *p is equal to oldval. > > I can't think of anything that will break because of this, as long > as the compare_and_swap eventually succeeds on some subsequent trial. > If the atomic operation has to abort for some reason other than *p being > unequal to oldval, that should be cool. Maybe we should document it in glibc, something like compare_and_swap compares the contents of a variable with an old value. If the values are equal and a new value is stored in the variable atomically, 1 is returned; otherwise, 1 is returned. H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 3:35 ` Hiroyuki Machida 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku @ 2002-02-01 7:17 ` H . J . Lu 2002-02-01 11:45 ` Maciej W. Rozycki 2002-02-04 7:01 ` PATCH: Fix ll/sc for mips Ralf Baechle 2002-02-01 11:50 ` Maciej W. Rozycki 2 siblings, 2 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-01 7:17 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: macro, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 12:35:23PM +0900, Hiroyuki Machida wrote: > > _test_and_set (int *p, int v) __THROW > > { > > int r, t; > > > > __asm__ __volatile__ > > (".set push\n\t" > > ".set noreorder\n" > > "1:\n\t" > > "ll %0,%3\n\t" > > "beq %0,%4,2f\n\t" > > "move %1,%4\n\t" > > "sc %1,%2\n\t" > > "beqz %1,1b\n\t" > > "nop\n" > > "2:\n\t" > > ".set pop" > > : "=&r" (r), "=&r" (t), "=m" (*p) > > : "m" (*p), "r" (v) > > : "memory"); > > > > return r; > > } > > Gas will fill delay slots. Same object codes will be produced, so I > think you don't have to do that by hand. It will make the code more readable. We don't have to guess what the assembler will do. H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 7:17 ` H . J . Lu @ 2002-02-01 11:45 ` Maciej W. Rozycki 2002-02-01 18:29 ` PATCH: Fix ll/sc for mips (take 3) H . J . Lu 2002-02-04 7:01 ` PATCH: Fix ll/sc for mips Ralf Baechle 1 sibling, 1 reply; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-01 11:45 UTC (permalink / raw) To: H . J . Lu; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Thu, 31 Jan 2002, H . J . Lu wrote: > > Gas will fill delay slots. Same object codes will be produced, so I > > think you don't have to do that by hand. > > It will make the code more readable. We don't have to guess what > the assembler will do. But you lose a chance for something useful being reordered to the slot. That might not necessarily be a "nop". Please don't forget of indents anyway. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* PATCH: Fix ll/sc for mips (take 3) 2002-02-01 11:45 ` Maciej W. Rozycki @ 2002-02-01 18:29 ` H . J . Lu 2002-02-01 23:01 ` Daniel Jacobowitz 0 siblings, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-01 18:29 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 12:45:02PM +0100, Maciej W. Rozycki wrote: > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > Gas will fill delay slots. Same object codes will be produced, so I > > > think you don't have to do that by hand. > > > > It will make the code more readable. We don't have to guess what > > the assembler will do. > > But you lose a chance for something useful being reordered to the slot. > That might not necessarily be a "nop". Please don't forget of indents > anyway. > Here is a new patch. I use branch likely to get rid of nops. Please tell me which indents I may have missed. Thanks. H.J. ---- 2002-02-01 H.J. Lu <hjl@gnu.org> * sysdeps/mips/pspinlock.c (__pthread_spin_lock): Use a different register in the delayed slot. Use branch likely. * sysdeps/mips/pt-machine.h (testandset): Call _test_and_set. (__compare_and_swap): Return 0 only when failed to compare. Use branch likely. 2002-02-01 H.J. Lu <hjl@gnu.org> * sysdeps/mips/atomicity.h (exchange_and_add): Use branch likely. (atomic_add): Likewise. (compare_and_swap): Return 0 only when failed to compare. Use branch likely. * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Use branch likely. --- glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c.llsc Thu Jan 31 10:49:37 2002 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c Fri Feb 1 10:17:47 2002 @@ -29,20 +29,21 @@ int __pthread_spin_lock (pthread_spinlock_t *lock) { - unsigned int tmp; + unsigned int tmp1, tmp2; asm volatile ("\t\t\t# spin_lock\n\t" + "ll %1,%3\n" "1:\n\t" - "ll %1,%2\n\t" ".set push\n\t" ".set noreorder\n\t" "bnez %1,1b\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%0\n\t" - "beqz %1,1b" - : "=m" (*lock), "=&r" (tmp) + "li %2,1\n\t" + "sc %2,%0\n\t" + "beqzl %2,1b\n\t" + "ll %1,%3\n\t" + ".set pop" + : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) : "m" (*lock) : "memory"); --- glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h.llsc Sat Dec 9 08:55:34 2000 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h Fri Feb 1 10:14:18 2002 @@ -33,41 +33,11 @@ /* Spinlock implementation; required. */ -#if (_MIPS_ISA >= _MIPS_ISA_MIPS2) - -PT_EI long int -testandset (int *spinlock) -{ - long int ret, temp; - - __asm__ __volatile__ - ("/* Inline spinlock test & set */\n\t" - "1:\n\t" - "ll %0,%3\n\t" - ".set push\n\t" - ".set noreorder\n\t" - "bnez %0,2f\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n\t" - "/* End spinlock test & set */" - : "=&r" (ret), "=&r" (temp), "=m" (*spinlock) - : "m" (*spinlock) - : "memory"); - - return ret; -} - -#else /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ - PT_EI long int testandset (int *spinlock) { return _test_and_set (spinlock, 1); } -#endif /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ /* Get some notion of the current stack. Need not be exactly the top @@ -84,22 +54,24 @@ register char * stack_pointer __asm__ (" PT_EI int __compare_and_swap (long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ ("/* Inline compare & swap */\n\t" + "ll %1,%5\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - " move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + "move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqzl %0,1b\n\t" + "ll %1,%5\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/mips/atomicity.h.llsc Mon Jul 9 11:58:19 2001 +++ glibc-2.2.4/sysdeps/mips/atomicity.h Fri Feb 1 09:55:05 2002 @@ -33,11 +33,15 @@ exchange_and_add (volatile uint32_t *mem __asm__ __volatile__ ("/* Inline exchange & add */\n\t" + "ll %0,%3\n" "1:\n\t" - "ll %0,%3\n\t" "addu %1,%4,%0\n\t" "sc %1,%2\n\t" - "beqz %1,1b\n\t" + ".set push\n\t" + ".set noreorder\n\t" + "beqzl %1,1b\n\t" + "ll %0,%3\n\t" + ".set pop\n\t" "/* End exchange & add */" : "=&r"(result), "=&r"(tmp), "=m"(*mem) : "m" (*mem), "r"(val) @@ -54,11 +58,15 @@ atomic_add (volatile uint32_t *mem, int __asm__ __volatile__ ("/* Inline atomic add */\n\t" + "ll %0,%2\n" "1:\n\t" - "ll %0,%2\n\t" "addu %0,%3,%0\n\t" "sc %0,%1\n\t" - "beqz %0,1b\n\t" + ".set push\n\t" + ".set noreorder\n\t" + "beqzl %0,1b\n\t" + "ll %0,%2\n\t" + ".set pop\n\t" "/* End atomic add */" : "=&r"(result), "=m"(*mem) : "m" (*mem), "r"(val) @@ -69,22 +77,24 @@ static inline int __attribute__ ((unused)) compare_and_swap (volatile long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ ("/* Inline compare & swap */\n\t" + "ll %1,%5\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - "move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + "move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqzl %0,1b\n\t" + "ll %1,%5\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Jul 9 11:58:45 2001 +++ glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h Fri Feb 1 09:56:47 2002 @@ -42,16 +42,19 @@ _test_and_set (int *p, int v) __THROW int r, t; __asm__ __volatile__ - ("1:\n\t" - "ll %0,%3\n\t" + ("/* Inline test and set */\n\t" + "ll %0,%3\n" + "1:\n\t" ".set push\n\t" ".set noreorder\n\t" "beq %0,%4,2f\n\t" - " move %1,%4\n\t" - ".set pop\n\t" + "move %1,%4\n\t" "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n" + "beqzl %1,1b\n\t" + "ll %0,%3\n\t" + ".set pop\n" + "2:\n\t" + "/* End test and set */" : "=&r" (r), "=&r" (t), "=m" (*p) : "m" (*p), "r" (v) : "memory"); ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-01 18:29 ` PATCH: Fix ll/sc for mips (take 3) H . J . Lu @ 2002-02-01 23:01 ` Daniel Jacobowitz 2002-02-01 23:15 ` H . J . Lu 0 siblings, 1 reply; 75+ messages in thread From: Daniel Jacobowitz @ 2002-02-01 23:01 UTC (permalink / raw) To: H . J . Lu; +Cc: Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 10:29:43AM -0800, H . J . Lu wrote: > On Fri, Feb 01, 2002 at 12:45:02PM +0100, Maciej W. Rozycki wrote: > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > > > Gas will fill delay slots. Same object codes will be produced, so I > > > > think you don't have to do that by hand. > > > > > > It will make the code more readable. We don't have to guess what > > > the assembler will do. > > > > But you lose a chance for something useful being reordered to the slot. > > That might not necessarily be a "nop". Please don't forget of indents > > anyway. > > > > Here is a new patch. I use branch likely to get rid of nops. Please > tell me which indents I may have missed. Can you really assume presence of the branch-likely instruction? I don't think so. -- Daniel Jacobowitz Carnegie Mellon University MontaVista Software Debian GNU/Linux Developer ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-01 23:01 ` Daniel Jacobowitz @ 2002-02-01 23:15 ` H . J . Lu 2002-02-02 2:37 ` Hiroyuki Machida 2002-02-02 3:26 ` Daniel Jacobowitz 0 siblings, 2 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-01 23:15 UTC (permalink / raw) To: Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 06:01:26PM -0500, Daniel Jacobowitz wrote: > On Fri, Feb 01, 2002 at 10:29:43AM -0800, H . J . Lu wrote: > > On Fri, Feb 01, 2002 at 12:45:02PM +0100, Maciej W. Rozycki wrote: > > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > > > > > Gas will fill delay slots. Same object codes will be produced, so I > > > > > think you don't have to do that by hand. > > > > > > > > It will make the code more readable. We don't have to guess what > > > > the assembler will do. > > > > > > But you lose a chance for something useful being reordered to the slot. > > > That might not necessarily be a "nop". Please don't forget of indents > > > anyway. > > > > > > > Here is a new patch. I use branch likely to get rid of nops. Please > > tell me which indents I may have missed. > > Can you really assume presence of the branch-likely instruction? I > don't think so. Why not? Can you show me a MIPS II or above CPU which doesn't have branch-likely instruction? From gcc, /* ISA has branch likely instructions (eg. mips2). */ /* Disable branchlikely for tx39 until compare rewrite. They haven't been generated up to this point. */ #define ISA_HAS_BRANCHLIKELY (mips_isa != 1 \ /* || TARGET_MIPS3900 */) Did I miss something? H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-01 23:15 ` H . J . Lu @ 2002-02-02 2:37 ` Hiroyuki Machida 2002-02-04 9:32 ` Dominic Sweetman 2002-02-02 3:26 ` Daniel Jacobowitz 1 sibling, 1 reply; 75+ messages in thread From: Hiroyuki Machida @ 2002-02-02 2:37 UTC (permalink / raw) To: hjl; +Cc: macro, libc-alpha, linux-mips From: "H . J . Lu" <hjl@lucon.org> Subject: Re: PATCH: Fix ll/sc for mips (take 3) Date: Fri, 1 Feb 2002 15:15:13 -0800 > Why not? Can you show me a MIPS II or above CPU which doesn't have > branch-likely instruction? From gcc, > > /* ISA has branch likely instructions (eg. mips2). */ > /* Disable branchlikely for tx39 until compare rewrite. They haven't > been generated up to this point. */ > #define ISA_HAS_BRANCHLIKELY (mips_isa != 1 \ > /* || TARGET_MIPS3900 */) > > Did I miss something? I think we can assume CPU has branch-likely insns, if CPU has MIPS ISA 2 or greater ISA, as H.J said. I don't know any exception of this. If anyone know exceptions, please let us know. (FYI: we can't assume CPU has LL/SC even if CPU has branch-likely insns. ) HL's patch looks good for me. --- Hiroyuki Machida Sony Corp. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 2:37 ` Hiroyuki Machida @ 2002-02-04 9:32 ` Dominic Sweetman 2002-02-05 6:16 ` Jay Carlson 0 siblings, 1 reply; 75+ messages in thread From: Dominic Sweetman @ 2002-02-04 9:32 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: hjl, linux-mips Hiroyuki Machida (machida@sm.sony.co.jp) writes: > I think we can assume CPU has branch-likely insns, if CPU has MIPS > ISA 2 or greater ISA.. "MIPS II" is officially the instruction set introduced for the long lost R6000 CPU. But "MIPS II" is now used to mean "the 32-bit subset of MIPS III" (which is extremely close to the same thing, but I'm never quite sure about the last details of the R6000 - Kevin would remember better, probably). OK: branch-likely is definitely part of MIPS II and MIPS32. There are still MIPS CPUs in regular use which are based on MIPS I and don't provide them. Generally the advantages of MIPS II are slight, so if you want to build a kernel which will not require instruction-set variants, it's no big deal to restrict it to MIPS I. > (FYI: we can't assume CPU has LL/SC even if CPU has branch-likely > insns. ) LL/SC is also part of MIPS III (and the 32-bit variants are thus taken to be in MIPS II). Unfortunately, the documentation of LL/SC gave the impression that they were useful only in multiprocessor systems, so they were omitted by NEC building the Vr41xx and Toshiba's R59xx. In both cases it's a bug - but since it isn't about to be fixed, you need workarounds. In these more enlightened days, CPU vendors are more likely to ask an operating system person before they leave out bits of the instruction set, so we hope it won't happen again! Dominic Sweetman Algorithmics Ltd The Fruit Farm, Ely Road, Chittering, CAMBS CB5 9PH, ENGLAND phone: +44 1223 706200 / fax: +44 1223 706250 / direct: +44 1223 706205 http://www.algor.co.uk ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-04 9:32 ` Dominic Sweetman @ 2002-02-05 6:16 ` Jay Carlson 2002-02-05 8:28 ` Ralf Baechle 0 siblings, 1 reply; 75+ messages in thread From: Jay Carlson @ 2002-02-05 6:16 UTC (permalink / raw) To: Dominic Sweetman; +Cc: Jay Carlson, Hiroyuki Machida, hjl, linux-mips On Monday, February 4, 2002, at 04:32 AM, Dominic Sweetman wrote: > > Hiroyuki Machida (machida@sm.sony.co.jp) writes: > >> I think we can assume CPU has branch-likely insns, if CPU has MIPS >> ISA 2 or greater ISA.. > > "MIPS II" is officially the instruction set introduced for the long > lost R6000 CPU. > > But "MIPS II" is now used to mean "the 32-bit subset of MIPS III" > (which is extremely close to the same thing, but I'm never quite sure > about the last details of the R6000 - Kevin would remember better, > probably). > > OK: branch-likely is definitely part of MIPS II and MIPS32. There are > still MIPS CPUs in regular use which are based on MIPS I and don't > provide them. Generally the advantages of MIPS II are slight, so if > you want to build a kernel which will not require instruction-set > variants, it's no big deal to restrict it to MIPS I. There's load interlocks, which you can depend on starting with MIPS II. They're also present on the TX39 and VR41xx. For non-PIC code, I see around a 5-7% reduction in userland code size by compiling with -Wa,-mips2, which afaict just eliminates the generation of nops after loads. The compiler is still generating code anticipating the delays, which is good. For PIC code, I remember the benefits being in the same range. And you're fighting gas, which will generate load-delay nops in the middle of la/lw/sw macro expansions in PIC mode regardless of what mips architecture you're building for. I made some patches against an older binutils to eliminate those nops for mips2 and up, and got ANOTHER 5-7%, of course depending on actual instruction mix. Given that I tossed out the SVR4 ABI in search of code density in snow, I'm probably a little abnormal in these concerns. But other people on small platforms may care. Jay ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 6:16 ` Jay Carlson @ 2002-02-05 8:28 ` Ralf Baechle 2002-02-05 15:10 ` Jay Carlson 0 siblings, 1 reply; 75+ messages in thread From: Ralf Baechle @ 2002-02-05 8:28 UTC (permalink / raw) To: Jay Carlson; +Cc: Dominic Sweetman, Hiroyuki Machida, hjl, linux-mips On Tue, Feb 05, 2002 at 01:16:46AM -0500, Jay Carlson wrote: > Given that I tossed out the SVR4 ABI in search of code density in snow, > I'm probably a little abnormal in these concerns. But other people on > small platforms may care. SNOW certainly was a nice invention and the definition of small is changing. Are you planning to keep up the support for SNOW? Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 8:28 ` Ralf Baechle @ 2002-02-05 15:10 ` Jay Carlson 2002-02-05 16:06 ` Jay Carlson 0 siblings, 1 reply; 75+ messages in thread From: Jay Carlson @ 2002-02-05 15:10 UTC (permalink / raw) To: Ralf Baechle Cc: Jay Carlson, Dominic Sweetman, Hiroyuki Machida, hjl, linux-mips On Tuesday, February 5, 2002, at 03:28 AM, Ralf Baechle wrote: > On Tue, Feb 05, 2002 at 01:16:46AM -0500, Jay Carlson wrote: > >> Given that I tossed out the SVR4 ABI in search of code density in snow, >> I'm probably a little abnormal in these concerns. But other people on >> small platforms may care. > > SNOW certainly was a nice invention and the definition of small is > changing. Are you planning to keep up the support for SNOW? Yeah, although it's slow going. As usual, "which toolchain" plays a major part in the delay :-) (Quick background for the list: Because there's such a large code size penalty to PIC/abicalls, I resurrected the bad old Linux/SVR3 statically linked, dynamically loaded libraries, which are linked at absolute locations. Shane Nay took this from a cute demo to a working distribution for the Agenda VR3; Brian Webb helped. Typical code reduction is ~25-40%, eg 391k->272k.) I think I finally have a working jumptable implementation, which should allow (careful!) upgrades of libraries without triggering app rebuilds. The pain was not actually jumptables themselves; it was getting all exported data in a library shuffled around so it could live at fixed addresses. -fdata-sections let me do most of this with linker scripts. Unfortunately, g++ was still emitting "int foo;" in ".bss", so I had to fix it to generate the proper ".bss.foo". I created a toolchain builder based on the RH71 SRPMs. About halfway through this, I remembered why I hadn't upgraded from glibc 2.0 to 2.2 before---the library size doubled. So after getting a few huge statically linked executables tested to make sure the toolchain was sound, I backtracked to uclibc and the old Agenda glibc "2.0.7". I have plausible-looking builds of both, but I haven't actually run them on real hardware..... (Is there any hope of patching glibc 2.2.4 with the sglibc patches? If so, how?) I started writing a /lib/ld-snow.so.1 to get the library loading code out of the main body of executables. I'm hoping to have a beta of a full toolchain/library build tool shipped this weekend or next, and from there hook into the community efforts to automate rebuilding the Agenda VR3 from source. I figure X and a pile of C++ fltk apps will expose any lingering bugs in the toolchain and new bugs in the shared library mechanism. I'd like to be less Agenda-centric. I think this work would benefit the Helio as well as other small platforms. But as you say, the definition of "small" is changing; hopefully in few years, machines will be big enough to support the SVR4 ABI without second thoughts. I don't know; it's possible that the really low end machines will just get cheaper instead of bigger. Speaking of small, anybody have a Linux box with a CPU with working MIPS16 support? (Other than Vr41xx; I have plenty of those to test on.) I have a small test case I'd like someone to run. Jay ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 15:10 ` Jay Carlson @ 2002-02-05 16:06 ` Jay Carlson 0 siblings, 0 replies; 75+ messages in thread From: Jay Carlson @ 2002-02-05 16:06 UTC (permalink / raw) To: Jay Carlson; +Cc: linux-mips On Tuesday, February 5, 2002, at 10:10 AM, Jay Carlson wrote: > (Quick background for the list: Because there's such a large code size > penalty to PIC/abicalls, I resurrected the bad old Linux/SVR3 > statically linked, dynamically loaded libraries, which are linked at > absolute locations. Shane Nay took this from a cute demo to a working > distribution for the Agenda VR3; Brian Webb helped. Typical code > reduction is ~25-40%, eg 391k->272k.) Oh yes, performance. Apps on the Agenda VR3 built in the snow ABI are dramatically faster/more responsive. If you don't believe me, go search the agenda-dev list and read the testimonials :-) I don't fully understand why, though. Here are my speculations; bear in mind that the VR3 and some of the other small boxes have 16-bit memory interfaces with small i/d caches. 1) Better icache efficiency. 2) Fewer loads (and stalls) to get typical work done. In PIC, you need a load per symbol reference, and that's every function call. 3) Better dcache efficiency. The GOT no longer needs to be hit for those symbol references. 4) Reduced TLB usage. The GOT pages for each module are quite hot, so now that we're no longer touching them, their 4k (ouch) TLB entries can point somewhere more useful. 5) No symbol resolution at load time. For C++ apps, this can help startup a lot. (prelinking fixes this too) 6) Better scheduling from gcc. egcs seemed to do a better job of arranging loads ahead of use when building non-pic; on the TX39, this helps even more due to non-blocking loads. I dunno. Jay ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-01 23:15 ` H . J . Lu 2002-02-02 2:37 ` Hiroyuki Machida @ 2002-02-02 3:26 ` Daniel Jacobowitz 2002-02-02 18:53 ` Justin Carlson 1 sibling, 1 reply; 75+ messages in thread From: Daniel Jacobowitz @ 2002-02-02 3:26 UTC (permalink / raw) To: H . J . Lu; +Cc: Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 03:15:13PM -0800, H . J . Lu wrote: > On Fri, Feb 01, 2002 at 06:01:26PM -0500, Daniel Jacobowitz wrote: > > On Fri, Feb 01, 2002 at 10:29:43AM -0800, H . J . Lu wrote: > > > On Fri, Feb 01, 2002 at 12:45:02PM +0100, Maciej W. Rozycki wrote: > > > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > > > > > > > Gas will fill delay slots. Same object codes will be produced, so I > > > > > > think you don't have to do that by hand. > > > > > > > > > > It will make the code more readable. We don't have to guess what > > > > > the assembler will do. > > > > > > > > But you lose a chance for something useful being reordered to the slot. > > > > That might not necessarily be a "nop". Please don't forget of indents > > > > anyway. > > > > > > > > > > Here is a new patch. I use branch likely to get rid of nops. Please > > > tell me which indents I may have missed. > > > > Can you really assume presence of the branch-likely instruction? I > > don't think so. > > Why not? Can you show me a MIPS II or above CPU which doesn't have > branch-likely instruction? From gcc, > > /* ISA has branch likely instructions (eg. mips2). */ > /* Disable branchlikely for tx39 until compare rewrite. They haven't > been generated up to this point. */ > #define ISA_HAS_BRANCHLIKELY (mips_isa != 1 \ > /* || TARGET_MIPS3900 */) > > Did I miss something? My fault. I was indeed thinking of the tx39, which is not normally MIPS2. -- Daniel Jacobowitz Carnegie Mellon University MontaVista Software Debian GNU/Linux Developer ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 3:26 ` Daniel Jacobowitz @ 2002-02-02 18:53 ` Justin Carlson 2002-02-02 20:03 ` H . J . Lu 2002-02-04 6:46 ` Ralf Baechle 0 siblings, 2 replies; 75+ messages in thread From: Justin Carlson @ 2002-02-02 18:53 UTC (permalink / raw) To: Daniel Jacobowitz Cc: H . J . Lu, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips On Fri, 2002-02-01 at 22:26, Daniel Jacobowitz wrote: > On Fri, Feb 01, 2002 at 03:15:13PM -0800, H . J . Lu wrote: > > On Fri, Feb 01, 2002 at 06:01:26PM -0500, Daniel Jacobowitz wrote: > > > On Fri, Feb 01, 2002 at 10:29:43AM -0800, H . J . Lu wrote: > > > > On Fri, Feb 01, 2002 at 12:45:02PM +0100, Maciej W. Rozycki wrote: > > > > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > > > > > > > > > > Gas will fill delay slots. Same object codes will be produced, so I > > > > > > > think you don't have to do that by hand. > > > > > > > > > > > > It will make the code more readable. We don't have to guess what > > > > > > the assembler will do. > > > > > > > > > > But you lose a chance for something useful being reordered to the slot. > > > > > That might not necessarily be a "nop". Please don't forget of indents > > > > > anyway. > > > > > > > > > > > > > Here is a new patch. I use branch likely to get rid of nops. Please > > > > tell me which indents I may have missed. > > > > > > Can you really assume presence of the branch-likely instruction? I > > > don't think so. > > Actually, regardless of whether modern cpus implement it, I'd argue that avoiding the branch likely is a good idea for 2 reasons: 1) In the latest MIPS specs (mips32 and mips64) branch likelies have officially been deprecated as probable removals from the architecture in the not-too-distant future. 2) More importantly, most implementations don't use any sort of dynamic branch prediction on branch likelies. They predict taken, always, since that's the specified intent (it's a branch *likely* to be taken). For most spin locks, the normal behaviour is a fall through, not taking that branch, so you're inflicting a branch mispredict penalty on every lock grabbed without contention. Even for locks which the general case is contention, giving the processor branch predictor a chance to learn that is a Good Idea. -Justin ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 18:53 ` Justin Carlson @ 2002-02-02 20:03 ` H . J . Lu 2002-02-02 20:49 ` Hartvig Ekner ` (2 more replies) 2002-02-04 6:46 ` Ralf Baechle 1 sibling, 3 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-02 20:03 UTC (permalink / raw) To: Justin Carlson Cc: Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc On Sat, Feb 02, 2002 at 01:53:23PM -0500, Justin Carlson wrote: > > Actually, regardless of whether modern cpus implement it, I'd argue that > avoiding the branch likely is a good idea for 2 reasons: > > 1) In the latest MIPS specs (mips32 and mips64) branch likelies have > officially been deprecated as probable removals from the architecture in > the not-too-distant future. > > 2) More importantly, most implementations don't use any sort of dynamic > branch prediction on branch likelies. They predict taken, always, since > that's the specified intent (it's a branch *likely* to be taken). For > most spin locks, the normal behaviour is a fall through, not taking that > branch, so you're inflicting a branch mispredict penalty on every lock > grabbed without contention. Even for locks which the general case is > contention, giving the processor branch predictor a chance to learn that > is a Good Idea. > Does everyone agree with this? If yes, I can make a patch not to use branch likely. But on the other hand, "gcc -mips2" will generate code using branch likely. If branch likely doesn't buy you anything, shouldn't we change gcc not to generate branch likely instructions? H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 20:03 ` H . J . Lu @ 2002-02-02 20:49 ` Hartvig Ekner 2002-02-02 20:49 ` Hartvig Ekner 2002-02-03 5:47 ` Justin Carlson [not found] ` <mailpost.1012680250.7159@news-sj1-1> 2 siblings, 1 reply; 75+ messages in thread From: Hartvig Ekner @ 2002-02-02 20:49 UTC (permalink / raw) To: H . J . Lu Cc: Justin Carlson, Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc H . J . Lu writes: > Does everyone agree with this? If yes, I can make a patch not to use > branch likely. But on the other hand, "gcc -mips2" will generate code > using branch likely. If branch likely doesn't buy you anything, > shouldn't we change gcc not to generate branch likely instructions? I would say it's still too early to actively remove them. There are many processors out there (certainly in the embedded world most of them) without branch prediction, and for all of those branch-likelys actually help performance. /Hartvig ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 20:49 ` Hartvig Ekner @ 2002-02-02 20:49 ` Hartvig Ekner 0 siblings, 0 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-02 20:49 UTC (permalink / raw) To: H . J . Lu Cc: Justin Carlson, Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc H . J . Lu writes: > Does everyone agree with this? If yes, I can make a patch not to use > branch likely. But on the other hand, "gcc -mips2" will generate code > using branch likely. If branch likely doesn't buy you anything, > shouldn't we change gcc not to generate branch likely instructions? I would say it's still too early to actively remove them. There are many processors out there (certainly in the embedded world most of them) without branch prediction, and for all of those branch-likelys actually help performance. /Hartvig ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 20:03 ` H . J . Lu 2002-02-02 20:49 ` Hartvig Ekner @ 2002-02-03 5:47 ` Justin Carlson 2002-02-04 19:17 ` Paul Koning [not found] ` <mailpost.1012680250.7159@news-sj1-1> 2 siblings, 1 reply; 75+ messages in thread From: Justin Carlson @ 2002-02-03 5:47 UTC (permalink / raw) To: H . J . Lu Cc: Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc On Sat, 2002-02-02 at 15:03, H . J . Lu wrote: > On Sat, Feb 02, 2002 at 01:53:23PM -0500, Justin Carlson wrote: > > > > Actually, regardless of whether modern cpus implement it, I'd argue that > > avoiding the branch likely is a good idea for 2 reasons: > > > > 1) In the latest MIPS specs (mips32 and mips64) branch likelies have > > officially been deprecated as probable removals from the architecture in > > the not-too-distant future. > > > > 2) More importantly, most implementations don't use any sort of dynamic > > branch prediction on branch likelies. They predict taken, always, since > > that's the specified intent (it's a branch *likely* to be taken). For > > most spin locks, the normal behaviour is a fall through, not taking that > > branch, so you're inflicting a branch mispredict penalty on every lock > > grabbed without contention. Even for locks which the general case is > > contention, giving the processor branch predictor a chance to learn that > > is a Good Idea. > > > > Does everyone agree with this? If yes, I can make a patch not to use > branch likely. But on the other hand, "gcc -mips2" will generate code > using branch likely. If branch likely doesn't buy you anything, > shouldn't we change gcc not to generate branch likely instructions? > I know of at least one internal version of gcc which already has been hacked to remove generation of branch likely instructions. It actually helped performance a bit because gcc has (possibly had, this was 6-9 months ago) the bad habit of generating code like this: beqzl $1, next daddiu $2, $3, 1 next: ... This looks like a nice, compact way to do a conditional move. In practice, it's a horrendous idea due to the lack of consideration of the branch prediction. It's easier to tell the compiler not to generate branch likelies than to try to fix the code generation for this case. :) Also, I didn't say branch likely doesn't buy you anything; there are situations where it works well. Looking at the spin_lock code, though, this isn't one of them. The cases where it is a win are far enough between that my personal practice is to generally avoid them. -Justin ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-03 5:47 ` Justin Carlson @ 2002-02-04 19:17 ` Paul Koning 0 siblings, 0 replies; 75+ messages in thread From: Paul Koning @ 2002-02-04 19:17 UTC (permalink / raw) To: justinca; +Cc: hjl, dan, macro, machida, libc-alpha, linux-mips, gcc >>>>> "Justin" == Justin Carlson <justinca@ri.cmu.edu> writes: Justin> On Sat, 2002-02-02 at 15:03, H . J . Lu wrote: >> Does everyone agree with this? If yes, I can make a patch not to >> use branch likely. But on the other hand, "gcc -mips2" will >> generate code using branch likely. If branch likely doesn't buy >> you anything, shouldn't we change gcc not to generate branch >> likely instructions? >> Justin> I know of at least one internal version of gcc which already Justin> has been hacked to remove generation of branch likely Justin> instructions. More precisely (if you're looking at the same one I was) -- it has a target processor type check that disables it for those CPUs where it is known to be not a good idea. Justin> Also, I didn't say branch likely doesn't buy you anything; Justin> there are situations where it works well. Looking at the Justin> spin_lock code, though, this isn't one of them. I agree, and I think that point was missed. Independent of whether a particular processor (MIPS or otherwise) has the concept of "branch likely", the design rule of spinlocks is that you try to avoid spinning (i.e., avoid lock contention) in the system design. So for that particular construct, the right answer is "branch not likely". paul ^ permalink raw reply [flat|nested] 75+ messages in thread
[parent not found: <mailpost.1012680250.7159@news-sj1-1>]
* Re: PATCH: Fix ll/sc for mips (take 3) [not found] ` <mailpost.1012680250.7159@news-sj1-1> @ 2002-02-03 23:29 ` cgd 2002-02-04 6:07 ` Ralf Baechle 2002-02-04 9:46 ` Dominic Sweetman 0 siblings, 2 replies; 75+ messages in thread From: cgd @ 2002-02-03 23:29 UTC (permalink / raw) To: hjl Cc: Justin Carlson, Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc [ Hi Justin, enjoying winter in pgh? 8-] At Sat, 2 Feb 2002 20:04:10 +0000 (UTC), "H . J . Lu" wrote: > Does everyone agree with this? If yes, I can make a patch not to use > branch likely. But on the other hand, "gcc -mips2" will generate code > using branch likely. If branch likely doesn't buy you anything, > shouldn't we change gcc not to generate branch likely instructions? Branch-likely instructions probably _do_ buy you something (at least, slightly less code size) on some CPUs, probably even some CPUs which are still being produced. FYI, somebody, i think it was Mike Stump, submitted a patch to add a flag to disable branch-likely instructions N months ago. It was discussed a little bit, not AFAIK nothing was ever done with it. To quote the MIPS32 instruction set document from the MIPS web site: > Programming Notes: > > [ ... ] > > Software is strongly encouraged to avoid the use of the Branch > Likely instructions, as they will be removed from a future revision > of the MIPS Architecture. > > Some implementations always predict the branch will be taken, so > there is a significant penalty if the branch is not taken. Software > should only use this instruction when there is a very high > probability (98% or more) that the branch will be taken. If the > branch is not likely to be taken or if the probability of a taken > branch is unknown, software is encouraged to use the BGEZAL > instruction instead. If you have a sufficiently high static likelyhood of branch-taken, it may well be worthwhile to use branch-likely even on MIPS32/MIPS64 if it'll save you code space, or if you can determine that not clogging up your branch history / predictor buffers makes your code run faster. However, it should definitely not be the default. Anyway, from where I sit, this should be resolved like: * -mips1 - -mips5 should generate them by default (for strongly-predicted branches), -mips32 & -mips64 should not. (modify that for your favorite flag names, -march or whatever. 8-) * There should be flags to override the defaults either way, so you could say -mips2 -mno-branch-likely, or -mips32 -mbranch-likely. cgd ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-03 23:29 ` cgd @ 2002-02-04 6:07 ` Ralf Baechle 2002-02-04 9:46 ` Dominic Sweetman 1 sibling, 0 replies; 75+ messages in thread From: Ralf Baechle @ 2002-02-04 6:07 UTC (permalink / raw) To: cgd Cc: hjl, Justin Carlson, Daniel Jacobowitz, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips, gcc On Sun, Feb 03, 2002 at 03:29:28PM -0800, cgd@broadcom.com wrote: > At Sat, 2 Feb 2002 20:04:10 +0000 (UTC), "H . J . Lu" wrote: > > Does everyone agree with this? If yes, I can make a patch not to use > > branch likely. But on the other hand, "gcc -mips2" will generate code > > using branch likely. If branch likely doesn't buy you anything, > > shouldn't we change gcc not to generate branch likely instructions? > > Branch-likely instructions probably _do_ buy you something (at least, > slightly less code size) on some CPUs, probably even some CPUs which > are still being produced. I benchmarked the performance improvment on R4000/R4400 by using branch likely instructions to be in the range of 1-2% in a piece of pretty "branchy" code, so we don't want to disable branch likely right entirely. Newer CPU types, in particular those featuring branch prediction tend to perform differently. I suggest to enable branch likely in gcc for those > MIPS II CPUs where they're known to improve performance or when optimizing for code size. Unfortunately gcc's knowledge about such architecture details is rather limited. Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-03 23:29 ` cgd 2002-02-04 6:07 ` Ralf Baechle @ 2002-02-04 9:46 ` Dominic Sweetman 2002-02-04 16:31 ` H . J . Lu 2002-02-04 17:44 ` cgd 1 sibling, 2 replies; 75+ messages in thread From: Dominic Sweetman @ 2002-02-04 9:46 UTC (permalink / raw) To: cgd; +Cc: linux-mips cgd@broadcom.com (cgd@broadcom.com) writes: > Branch-likely instructions probably _do_ buy you something (at > least, slightly less code size) on some CPUs, probably even some > CPUs which are still being produced. Here's how branch likely is used to improve performance in a simple MIPS CPU, and why it has no effect on code size. You start off with this: loopstart: insn 1 insn 2 .... insn last branch to loopstart nop In small loops, the last instruction in the loop might well calculate the branch condition, so it can't be moved into the delay slot of the loop-closing branch. That puts a no-op into every loop iteration. With branch-likely, you can transform the loop to loopstart: insn 1 loop2: insn 2 .... insn last branch-likely loop2 insn 1 (copy) The nop is replaced by a duplicate instruction from the top of the loop. Good for performance, no effect on code size. Builders of clever modern CPUs full of branch prediction hardware, multiple instruction issue and instruction re-ordering find the coupling of the branch-likely to the following instruction makes their CPUs more complicated. That's why MIPS have warned that the instructions will be removed from some future version of the MIPS instruction set. -- Dominic Sweetman Algorithmics Ltd The Fruit Farm, Ely Road, Chittering, CAMBS CB5 9PH, ENGLAND phone: +44 1223 706200 / fax: +44 1223 706250 / direct: +44 1223 706205 http://www.algor.co.uk ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-04 9:46 ` Dominic Sweetman @ 2002-02-04 16:31 ` H . J . Lu 2002-02-04 16:46 ` Dominic Sweetman 2002-02-04 17:44 ` cgd 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-04 16:31 UTC (permalink / raw) To: Dominic Sweetman; +Cc: cgd, linux-mips On Mon, Feb 04, 2002 at 09:46:45AM +0000, Dominic Sweetman wrote: > > cgd@broadcom.com (cgd@broadcom.com) writes: > > > Branch-likely instructions probably _do_ buy you something (at > > least, slightly less code size) on some CPUs, probably even some > > CPUs which are still being produced. > > Here's how branch likely is used to improve performance in a simple > MIPS CPU, and why it has no effect on code size. > > You start off with this: > > loopstart: insn 1 > insn 2 > .... > insn last > branch to loopstart > nop > > In small loops, the last instruction in the loop might well calculate > the branch condition, so it can't be moved into the delay slot of the > loop-closing branch. That puts a no-op into every loop iteration. > With branch-likely, you can transform the loop to > > loopstart: insn 1 > loop2: insn 2 > .... > insn last > branch-likely loop2 > insn 1 (copy) > > The nop is replaced by a duplicate instruction from the top > of the loop. Good for performance, no effect on code size. > > Builders of clever modern CPUs full of branch prediction hardware, > multiple instruction issue and instruction re-ordering find the > coupling of the branch-likely to the following instruction makes their > CPUs more complicated. That's why MIPS have warned that the > instructions will be removed from some future version of the MIPS > instruction set. I can change glibc not to use branch-likely without using nop. But it may require one or two instructions outside of the loop. Should I do it given what we know now? H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-04 16:31 ` H . J . Lu @ 2002-02-04 16:46 ` Dominic Sweetman 2002-02-05 1:28 ` H . J . Lu 0 siblings, 1 reply; 75+ messages in thread From: Dominic Sweetman @ 2002-02-04 16:46 UTC (permalink / raw) To: H . J . Lu; +Cc: Dominic Sweetman, cgd, linux-mips H . J . Lu (hjl@lucon.org) writes: > I can change glibc not to use branch-likely without using nop. But it > may require one or two instructions outside of the loop. Should I do > it given what we know now? I would not recommend using "branch likely" in assembler coding, if that's what you're asking. Dominic ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-04 16:46 ` Dominic Sweetman @ 2002-02-05 1:28 ` H . J . Lu 2002-02-05 2:58 ` Daniel Jacobowitz 0 siblings, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-05 1:28 UTC (permalink / raw) To: Dominic Sweetman; +Cc: GNU C Library, linux-mips On Mon, Feb 04, 2002 at 04:46:07PM +0000, Dominic Sweetman wrote: > > H . J . Lu (hjl@lucon.org) writes: > > > I can change glibc not to use branch-likely without using nop. But it > > may require one or two instructions outside of the loop. Should I do > > it given what we know now? > > I would not recommend using "branch likely" in assembler coding, if > that's what you're asking. > Here is a patch to remove branch likely. But I couldn't find a way not to fill the delay slot with nop. BTW, is that safe to remove ".set noreorder"? H.J. --- 2002-02-04 H.J. Lu <hjl@gnu.org> * sysdeps/mips/pspinlock.c (__pthread_spin_lock): Not use branch likely. * sysdeps/mips/pt-machine.h (testandset): Liekwise. (__compare_and_swap): Liekwise. 2002-02-04 H.J. Lu <hjl@gnu.org> * sysdeps/mips/atomicity.h (exchange_and_add): Not use branch likely. (atomic_add): Likewise. (compare_and_swap): Likewise. * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Likewise. --- libc/linuxthreads/sysdeps/mips/pspinlock.c.llsc Mon Feb 4 13:45:01 2002 +++ libc/linuxthreads/sysdeps/mips/pspinlock.c Mon Feb 4 17:09:02 2002 @@ -40,7 +40,7 @@ __pthread_spin_lock (pthread_spinlock_t "bnez %1,1b\n\t" " li %2,1\n\t" "sc %2,%0\n\t" - "beqzl %2,1b\n\t" + "beqz %2,1b\n\t" " ll %1,%3\n\t" ".set pop" : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) --- libc/linuxthreads/sysdeps/mips/pt-machine.h.llsc Mon Feb 4 13:45:01 2002 +++ libc/linuxthreads/sysdeps/mips/pt-machine.h Mon Feb 4 17:09:36 2002 @@ -66,7 +66,7 @@ __compare_and_swap (long int *p, long in " move %0,$0\n\t" "move %0,%4\n\t" "sc %0,%2\n\t" - "beqzl %0,1b\n\t" + "beqz %0,1b\n\t" " ll %1,%5\n\t" ".set pop\n" "2:\n\t" --- libc/sysdeps/mips/atomicity.h.llsc Mon Feb 4 13:45:18 2002 +++ libc/sysdeps/mips/atomicity.h Mon Feb 4 17:14:48 2002 @@ -32,15 +32,15 @@ exchange_and_add (volatile uint32_t *mem int result, tmp; __asm__ __volatile__ - ("/* Inline exchange & add */\n\t" + ("/* Inline exchange & add */\n" + "1:\n" "ll %0,%3\n" - "1:\n\t" "addu %1,%4,%0\n\t" "sc %1,%2\n\t" ".set push\n\t" ".set noreorder\n\t" - "beqzl %1,1b\n\t" - " ll %0,%3\n\t" + "beqz %1,1b\n\t" + " nop\n\t" ".set pop\n\t" "/* End exchange & add */" : "=&r"(result), "=&r"(tmp), "=m"(*mem) @@ -57,15 +57,15 @@ atomic_add (volatile uint32_t *mem, int int result; __asm__ __volatile__ - ("/* Inline atomic add */\n\t" - "ll %0,%2\n" + ("/* Inline atomic add */\n" "1:\n\t" + "ll %0,%2\n" "addu %0,%3,%0\n\t" "sc %0,%1\n\t" ".set push\n\t" ".set noreorder\n\t" - "beqzl %0,1b\n\t" - " ll %0,%2\n\t" + "beqz %0,1b\n\t" + " nop\n\t" ".set pop\n\t" "/* End atomic add */" : "=&r"(result), "=m"(*mem) @@ -89,7 +89,7 @@ compare_and_swap (volatile long int *p, " move %0,$0\n\t" "move %0,%4\n\t" "sc %0,%2\n\t" - "beqzl %0,1b\n\t" + "beqz %0,1b\n\t" " ll %1,%5\n\t" ".set pop\n" "2:\n\t" --- libc/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Feb 4 13:45:28 2002 +++ libc/sysdeps/unix/sysv/linux/mips/sys/tas.h Mon Feb 4 17:19:06 2002 @@ -42,16 +42,16 @@ _test_and_set (int *p, int v) __THROW int r, t; __asm__ __volatile__ - ("/* Inline test and set */\n\t" - "ll %0,%3\n" + ("/* Inline test and set */\n" "1:\n\t" + "ll %0,%3\n" ".set push\n\t" ".set noreorder\n\t" "beq %0,%4,2f\n\t" " move %1,%4\n\t" "sc %1,%2\n\t" - "beqzl %1,1b\n\t" - " ll %0,%3\n\t" + "beqz %1,1b\n\t" + " nop\n\t" ".set pop\n" "2:\n\t" "/* End test and set */" ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 1:28 ` H . J . Lu @ 2002-02-05 2:58 ` Daniel Jacobowitz 2002-02-05 4:42 ` H . J . Lu 2002-02-05 19:30 ` H . J . Lu 0 siblings, 2 replies; 75+ messages in thread From: Daniel Jacobowitz @ 2002-02-05 2:58 UTC (permalink / raw) To: H . J . Lu; +Cc: Dominic Sweetman, GNU C Library, linux-mips On Mon, Feb 04, 2002 at 05:28:57PM -0800, H . J . Lu wrote: > On Mon, Feb 04, 2002 at 04:46:07PM +0000, Dominic Sweetman wrote: > > > > H . J . Lu (hjl@lucon.org) writes: > > > > > I can change glibc not to use branch-likely without using nop. But it > > > may require one or two instructions outside of the loop. Should I do > > > it given what we know now? > > > > I would not recommend using "branch likely" in assembler coding, if > > that's what you're asking. > > > > Here is a patch to remove branch likely. But I couldn't find a way > not to fill the delay slot with nop. BTW, is that safe to remove > ".set noreorder"? You mean, if there is nothing which can be put there? Yes, it's safe. > --- libc/linuxthreads/sysdeps/mips/pspinlock.c.llsc Mon Feb 4 13:45:01 2002 > +++ libc/linuxthreads/sysdeps/mips/pspinlock.c Mon Feb 4 17:09:02 2002 > @@ -40,7 +40,7 @@ __pthread_spin_lock (pthread_spinlock_t > "bnez %1,1b\n\t" > " li %2,1\n\t" > "sc %2,%0\n\t" > - "beqzl %2,1b\n\t" > + "beqz %2,1b\n\t" > " ll %1,%3\n\t" > ".set pop" > : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) Is that really what you meant to do? The ll is now in the delay slot of the beqz. > --- libc/linuxthreads/sysdeps/mips/pt-machine.h.llsc Mon Feb 4 13:45:01 2002 > +++ libc/linuxthreads/sysdeps/mips/pt-machine.h Mon Feb 4 17:09:36 2002 > @@ -66,7 +66,7 @@ __compare_and_swap (long int *p, long in > " move %0,$0\n\t" > "move %0,%4\n\t" > "sc %0,%2\n\t" > - "beqzl %0,1b\n\t" > + "beqz %0,1b\n\t" > " ll %1,%5\n\t" > ".set pop\n" > "2:\n\t" Ditto. > @@ -89,7 +89,7 @@ compare_and_swap (volatile long int *p, > " move %0,$0\n\t" > "move %0,%4\n\t" > "sc %0,%2\n\t" > - "beqzl %0,1b\n\t" > + "beqz %0,1b\n\t" > " ll %1,%5\n\t" > ".set pop\n" > "2:\n\t" Ditto. -- Daniel Jacobowitz Carnegie Mellon University MontaVista Software Debian GNU/Linux Developer ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 2:58 ` Daniel Jacobowitz @ 2002-02-05 4:42 ` H . J . Lu 2002-02-05 4:47 ` Daniel Jacobowitz 2002-02-05 19:30 ` H . J . Lu 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-05 4:42 UTC (permalink / raw) To: Daniel Jacobowitz; +Cc: Dominic Sweetman, GNU C Library, linux-mips On Mon, Feb 04, 2002 at 09:58:04PM -0500, Daniel Jacobowitz wrote: > > > --- libc/linuxthreads/sysdeps/mips/pspinlock.c.llsc Mon Feb 4 13:45:01 2002 > > +++ libc/linuxthreads/sysdeps/mips/pspinlock.c Mon Feb 4 17:09:02 2002 > > @@ -40,7 +40,7 @@ __pthread_spin_lock (pthread_spinlock_t > > "bnez %1,1b\n\t" > > " li %2,1\n\t" > > "sc %2,%0\n\t" > > - "beqzl %2,1b\n\t" > > + "beqz %2,1b\n\t" > > " ll %1,%3\n\t" > > ".set pop" > > : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) > > Is that really what you meant to do? The ll is now in the delay slot > of the beqz. Yes, it is ok since we don't care what ll does if the branch is not taken. H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 4:42 ` H . J . Lu @ 2002-02-05 4:47 ` Daniel Jacobowitz 2002-02-05 5:30 ` Justin Carlson 0 siblings, 1 reply; 75+ messages in thread From: Daniel Jacobowitz @ 2002-02-05 4:47 UTC (permalink / raw) To: H . J . Lu; +Cc: Dominic Sweetman, GNU C Library, linux-mips On Mon, Feb 04, 2002 at 08:42:47PM -0800, H . J . Lu wrote: > On Mon, Feb 04, 2002 at 09:58:04PM -0500, Daniel Jacobowitz wrote: > > > > > --- libc/linuxthreads/sysdeps/mips/pspinlock.c.llsc Mon Feb 4 13:45:01 2002 > > > +++ libc/linuxthreads/sysdeps/mips/pspinlock.c Mon Feb 4 17:09:02 2002 > > > @@ -40,7 +40,7 @@ __pthread_spin_lock (pthread_spinlock_t > > > "bnez %1,1b\n\t" > > > " li %2,1\n\t" > > > "sc %2,%0\n\t" > > > - "beqzl %2,1b\n\t" > > > + "beqz %2,1b\n\t" > > > " ll %1,%3\n\t" > > > ".set pop" > > > : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) > > > > Is that really what you meant to do? The ll is now in the delay slot > > of the beqz. > > Yes, it is ok since we don't care what ll does if the branch is not > taken. Won't this cause some gratuitous thrashing if someone else is trying to get the spinlock at the same time? -- Daniel Jacobowitz Carnegie Mellon University MontaVista Software Debian GNU/Linux Developer ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 4:47 ` Daniel Jacobowitz @ 2002-02-05 5:30 ` Justin Carlson 2002-02-05 8:39 ` Hartvig Ekner 0 siblings, 1 reply; 75+ messages in thread From: Justin Carlson @ 2002-02-05 5:30 UTC (permalink / raw) To: Daniel Jacobowitz; +Cc: H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Mon, 2002-02-04 at 23:47, Daniel Jacobowitz wrote: > > Won't this cause some gratuitous thrashing if someone else is trying to > get the spinlock at the same time? > Actually, if you look at the required semantics of ll, no. A ll by itself can never cause a sc from another cpu to fail. It's part of the semantic definition to avoid potential livelock cases, e.g. A does ll B does ll, foiling A's lock attempt A does sc, which fails A does ll, foiling B's lock attempt B does sc, which fails B does ll, foiling A's lock attempt ... Instead, this case becomes: A does ll B does ll A does sc, which succeeds, even though B has done a ll B does sc which fails A does critical section work B spins on ll until A releases the lock -Justin ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 5:30 ` Justin Carlson @ 2002-02-05 8:39 ` Hartvig Ekner 2002-02-05 8:39 ` Hartvig Ekner ` (2 more replies) 0 siblings, 3 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 8:39 UTC (permalink / raw) To: Justin Carlson Cc: Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips Note that the issue of a "LL" will trigger bus watching activity in the system logic (and maybe delays?) I would personally try to avoid issuing "dangling" ll's in the normal case, even though the functionality would be ok, and in some cases they are inavoidable. /Hartvig Justin Carlson writes: > > On Mon, 2002-02-04 at 23:47, Daniel Jacobowitz wrote: > > > > Won't this cause some gratuitous thrashing if someone else is trying to > > get the spinlock at the same time? > > > > Actually, if you look at the required semantics of ll, no. A ll by > itself can never cause a sc from another cpu to fail. It's part of the > semantic definition to avoid potential livelock cases, e.g. > > A does ll > B does ll, foiling A's lock attempt > A does sc, which fails > A does ll, foiling B's lock attempt > B does sc, which fails > B does ll, foiling A's lock attempt > ... > > Instead, this case becomes: > A does ll > B does ll > A does sc, which succeeds, even though B has done a ll > B does sc which fails > A does critical section work > B spins on ll until A releases the lock > > > -Justin > > -- _ _ _____ ____ Hartvig Ekner Mailto:hartvige@mips.com |\ /| | |____)(____ Direct: +45 4486 5503 | \/ | | | _____) MIPS Denmark Switch: +45 4486 5555 T E C H N O L O G I E S http://www.mips.com Fax...: +45 4486 5556 ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 8:39 ` Hartvig Ekner @ 2002-02-05 8:39 ` Hartvig Ekner 2002-02-05 11:37 ` Maciej W. Rozycki 2002-02-05 12:12 ` Ralf Baechle 2 siblings, 0 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 8:39 UTC (permalink / raw) To: Justin Carlson Cc: Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips Note that the issue of a "LL" will trigger bus watching activity in the system logic (and maybe delays?) I would personally try to avoid issuing "dangling" ll's in the normal case, even though the functionality would be ok, and in some cases they are inavoidable. /Hartvig Justin Carlson writes: > > On Mon, 2002-02-04 at 23:47, Daniel Jacobowitz wrote: > > > > Won't this cause some gratuitous thrashing if someone else is trying to > > get the spinlock at the same time? > > > > Actually, if you look at the required semantics of ll, no. A ll by > itself can never cause a sc from another cpu to fail. It's part of the > semantic definition to avoid potential livelock cases, e.g. > > A does ll > B does ll, foiling A's lock attempt > A does sc, which fails > A does ll, foiling B's lock attempt > B does sc, which fails > B does ll, foiling A's lock attempt > ... > > Instead, this case becomes: > A does ll > B does ll > A does sc, which succeeds, even though B has done a ll > B does sc which fails > A does critical section work > B spins on ll until A releases the lock > > > -Justin > > -- _ _ _____ ____ Hartvig Ekner Mailto:hartvige@mips.com |\ /| | |____)(____ Direct: +45 4486 5503 | \/ | | | _____) MIPS Denmark Switch: +45 4486 5555 T E C H N O L O G I E S http://www.mips.com Fax...: +45 4486 5556 ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 8:39 ` Hartvig Ekner 2002-02-05 8:39 ` Hartvig Ekner @ 2002-02-05 11:37 ` Maciej W. Rozycki 2002-02-05 12:12 ` Ralf Baechle 2 siblings, 0 replies; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-05 11:37 UTC (permalink / raw) To: Hartvig Ekner Cc: Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Tue, 5 Feb 2002, Hartvig Ekner wrote: > Note that the issue of a "LL" will trigger bus watching activity in the > system logic (and maybe delays?) I would personally try to avoid issuing > "dangling" ll's in the normal case, even though the functionality would > be ok, and in some cases they are inavoidable. I'd suppose the address comparator to be active all the time, as it's the simpler approach and involves trivial asynchronous hardware and certainly no performance hit. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 8:39 ` Hartvig Ekner 2002-02-05 8:39 ` Hartvig Ekner 2002-02-05 11:37 ` Maciej W. Rozycki @ 2002-02-05 12:12 ` Ralf Baechle 2002-02-05 12:31 ` Maciej W. Rozycki 2 siblings, 1 reply; 75+ messages in thread From: Ralf Baechle @ 2002-02-05 12:12 UTC (permalink / raw) To: Hartvig Ekner Cc: Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Tue, Feb 05, 2002 at 09:39:27AM +0100, Hartvig Ekner wrote: > Note that the issue of a "LL" will trigger bus watching activity in the > system logic (and maybe delays?) I would personally try to avoid issuing > "dangling" ll's in the normal case, even though the functionality would > be ok, and in some cases they are inavoidable. Some of the processor manuals explicitly note that the execution of a ll instruction may not be visible at all externally. That's the case if the address is already in a primary cache line in exclusive (ll) or dirty (sc) state. That makes even if a processor supports full coherency since there is no reason to indicate the update to any other external agent. Or am I missing something? Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 12:12 ` Ralf Baechle @ 2002-02-05 12:31 ` Maciej W. Rozycki 2002-02-05 12:38 ` Hartvig Ekner 0 siblings, 1 reply; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-05 12:31 UTC (permalink / raw) To: Ralf Baechle Cc: Hartvig Ekner, Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Tue, 5 Feb 2002, Ralf Baechle wrote: > Some of the processor manuals explicitly note that the execution of a > ll instruction may not be visible at all externally. That's the case if > the address is already in a primary cache line in exclusive (ll) or > dirty (sc) state. That makes even if a processor supports full coherency > since there is no reason to indicate the update to any other external > agent. Or am I missing something? By definition, apart from an ordinary load, an "ll" does only the following two additional actions: 1. Loads the LLAddr register (it's visible in CP0, at least in certain implementations) to set up the ll comparator. 2. Sets the LLbit flip-flop to set the ll state to valid initially. None of the actions should normally involve externally visible activity. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 12:31 ` Maciej W. Rozycki @ 2002-02-05 12:38 ` Hartvig Ekner 2002-02-05 12:38 ` Hartvig Ekner ` (2 more replies) 0 siblings, 3 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 12:38 UTC (permalink / raw) To: Maciej W. Rozycki Cc: Ralf Baechle, Hartvig Ekner, Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips Some of MIPS's cores do externalize the event of a "LL" and make it visible on the bus interface. Similarly, the SC is externalized and requires a go/nogo response from the system logic. Think of it as putting a shared LLAddr & LLBit outside the processor. The SC will only succeed if the internal LLBit is ok *and* the external logic gives the go-ahead as well. The reasoning behind all this is that one can then utilize LL/SC in multi CPU systems without full coherency support being required. But then again, this might not be relevant for MIPS/Linux as it will not run without full HW coherency on multiple CPUs? /Hartvig Maciej W. Rozycki writes: > > On Tue, 5 Feb 2002, Ralf Baechle wrote: > > > Some of the processor manuals explicitly note that the execution of a > > ll instruction may not be visible at all externally. That's the case if > > the address is already in a primary cache line in exclusive (ll) or > > dirty (sc) state. That makes even if a processor supports full coherency > > since there is no reason to indicate the update to any other external > > agent. Or am I missing something? > > By definition, apart from an ordinary load, an "ll" does only the > following two additional actions: > > 1. Loads the LLAddr register (it's visible in CP0, at least in certain > implementations) to set up the ll comparator. > > 2. Sets the LLbit flip-flop to set the ll state to valid initially. > > None of the actions should normally involve externally visible activity. > > -- > + Maciej W. Rozycki, Technical University of Gdansk, Poland + > +--------------------------------------------------------------+ > + e-mail: macro@ds2.pg.gda.pl, PGP key available + > > -- _ _ _____ ____ Hartvig Ekner Mailto:hartvige@mips.com |\ /| | |____)(____ Direct: +45 4486 5503 | \/ | | | _____) MIPS Denmark Switch: +45 4486 5555 T E C H N O L O G I E S http://www.mips.com Fax...: +45 4486 5556 ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 12:38 ` Hartvig Ekner @ 2002-02-05 12:38 ` Hartvig Ekner 2002-02-05 13:28 ` Maciej W. Rozycki 2002-02-05 18:59 ` Ralf Baechle 2 siblings, 0 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 12:38 UTC (permalink / raw) To: Maciej W. Rozycki Cc: Ralf Baechle, Hartvig Ekner, Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips Some of MIPS's cores do externalize the event of a "LL" and make it visible on the bus interface. Similarly, the SC is externalized and requires a go/nogo response from the system logic. Think of it as putting a shared LLAddr & LLBit outside the processor. The SC will only succeed if the internal LLBit is ok *and* the external logic gives the go-ahead as well. The reasoning behind all this is that one can then utilize LL/SC in multi CPU systems without full coherency support being required. But then again, this might not be relevant for MIPS/Linux as it will not run without full HW coherency on multiple CPUs? /Hartvig Maciej W. Rozycki writes: > > On Tue, 5 Feb 2002, Ralf Baechle wrote: > > > Some of the processor manuals explicitly note that the execution of a > > ll instruction may not be visible at all externally. That's the case if > > the address is already in a primary cache line in exclusive (ll) or > > dirty (sc) state. That makes even if a processor supports full coherency > > since there is no reason to indicate the update to any other external > > agent. Or am I missing something? > > By definition, apart from an ordinary load, an "ll" does only the > following two additional actions: > > 1. Loads the LLAddr register (it's visible in CP0, at least in certain > implementations) to set up the ll comparator. > > 2. Sets the LLbit flip-flop to set the ll state to valid initially. > > None of the actions should normally involve externally visible activity. > > -- > + Maciej W. Rozycki, Technical University of Gdansk, Poland + > +--------------------------------------------------------------+ > + e-mail: macro@ds2.pg.gda.pl, PGP key available + > > -- _ _ _____ ____ Hartvig Ekner Mailto:hartvige@mips.com |\ /| | |____)(____ Direct: +45 4486 5503 | \/ | | | _____) MIPS Denmark Switch: +45 4486 5555 T E C H N O L O G I E S http://www.mips.com Fax...: +45 4486 5556 ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 12:38 ` Hartvig Ekner 2002-02-05 12:38 ` Hartvig Ekner @ 2002-02-05 13:28 ` Maciej W. Rozycki 2002-02-05 19:28 ` Hartvig Ekner 2002-02-05 18:59 ` Ralf Baechle 2 siblings, 1 reply; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-05 13:28 UTC (permalink / raw) To: Hartvig Ekner Cc: Ralf Baechle, Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Tue, 5 Feb 2002, Hartvig Ekner wrote: > Some of MIPS's cores do externalize the event of a "LL" and make it > visible on the bus interface. Similarly, the SC is externalized and > requires a go/nogo response from the system logic. Think of it as > putting a shared LLAddr & LLBit outside the processor. The SC will > only succeed if the internal LLBit is ok *and* the external logic gives > the go-ahead as well. OK, but an external register shouldn't need any additional CPU time to get its contents latched. > The reasoning behind all this is that one can then utilize LL/SC in > multi CPU systems without full coherency support being required. Nor should an external comparator. > But then again, this might not be relevant for MIPS/Linux as it will not > run without full HW coherency on multiple CPUs? How do you maintain coherency on such a system? To support such a model all shared area write accesses should be followed by explicit synchronization requests. It should be trivial but tedious to do for Linux, but it might not be that easy for threads. One bit I've forgotten about "ll" -- it also implies a "sync". -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 13:28 ` Maciej W. Rozycki @ 2002-02-05 19:28 ` Hartvig Ekner 2002-02-05 19:28 ` Hartvig Ekner 0 siblings, 1 reply; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 19:28 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: linux-mips Maciej W. Rozycki writes: > How do you maintain coherency on such a system? To support such a model > all shared area write accesses should be followed by explicit > synchronization requests. It should be trivial but tedious to do for > Linux, but it might not be that easy for threads. This would have to be a loosely coupled system - something using either software coherency or uncached accesses for the shared areas. And then LL/SC for synchronization primitives. There are a fair number of SOC designs like this, even with more than two processors. /Hartvig ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 19:28 ` Hartvig Ekner @ 2002-02-05 19:28 ` Hartvig Ekner 0 siblings, 0 replies; 75+ messages in thread From: Hartvig Ekner @ 2002-02-05 19:28 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: linux-mips Maciej W. Rozycki writes: > How do you maintain coherency on such a system? To support such a model > all shared area write accesses should be followed by explicit > synchronization requests. It should be trivial but tedious to do for > Linux, but it might not be that easy for threads. This would have to be a loosely coupled system - something using either software coherency or uncached accesses for the shared areas. And then LL/SC for synchronization primitives. There are a fair number of SOC designs like this, even with more than two processors. /Hartvig ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 12:38 ` Hartvig Ekner 2002-02-05 12:38 ` Hartvig Ekner 2002-02-05 13:28 ` Maciej W. Rozycki @ 2002-02-05 18:59 ` Ralf Baechle 2 siblings, 0 replies; 75+ messages in thread From: Ralf Baechle @ 2002-02-05 18:59 UTC (permalink / raw) To: Hartvig Ekner Cc: Maciej W. Rozycki, Justin Carlson, Daniel Jacobowitz, H . J . Lu, Dominic Sweetman, GNU C Library, linux-mips On Tue, Feb 05, 2002 at 01:38:34PM +0100, Hartvig Ekner wrote: > Some of MIPS's cores do externalize the event of a "LL" and make it > visible on the bus interface. Similarly, the SC is externalized and > requires a go/nogo response from the system logic. Think of it as > putting a shared LLAddr & LLBit outside the processor. The SC will > only succeed if the internal LLBit is ok *and* the external logic gives > the go-ahead as well. > > The reasoning behind all this is that one can then utilize LL/SC in > multi CPU systems without full coherency support being required. > > But then again, this might not be relevant for MIPS/Linux as it will not > run without full HW coherency on multiple CPUs? Linux could easily be hacked into handle such a configuration as a cluster. Anything else would be a pretty large job. Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 2:58 ` Daniel Jacobowitz 2002-02-05 4:42 ` H . J . Lu @ 2002-02-05 19:30 ` H . J . Lu 2002-02-05 21:54 ` H . J . Lu 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-05 19:30 UTC (permalink / raw) To: Dominic Sweetman, GNU C Library, linux-mips On Mon, Feb 04, 2002 at 09:58:04PM -0500, Daniel Jacobowitz wrote: > On Mon, Feb 04, 2002 at 05:28:57PM -0800, H . J . Lu wrote: > > On Mon, Feb 04, 2002 at 04:46:07PM +0000, Dominic Sweetman wrote: > > > > > > H . J . Lu (hjl@lucon.org) writes: > > > > > > > I can change glibc not to use branch-likely without using nop. But it > > > > may require one or two instructions outside of the loop. Should I do > > > > it given what we know now? > > > > > > I would not recommend using "branch likely" in assembler coding, if > > > that's what you're asking. > > > > > > > Here is a patch to remove branch likely. But I couldn't find a way > > not to fill the delay slot with nop. BTW, is that safe to remove > > ".set noreorder"? > > You mean, if there is nothing which can be put there? Yes, it's safe. > Here is a new patch. I removed the extra "ll" in the delay slot. H.J. --- 2002-02-05 H.J. Lu <hjl@gnu.org> * sysdeps/mips/pspinlock.c (__pthread_spin_lock): Not use branch likely. Remove ".set noreorder". * sysdeps/mips/pt-machine.h (testandset): Liekwise. (__compare_and_swap): Liekwise. 2002-02-05 H.J. Lu <hjl@gnu.org> * sysdeps/mips/atomicity.h (exchange_and_add): Not use branch likely. Remove ".set noreorder". (atomic_add): Likewise. (compare_and_swap): Likewise. * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Likewise. --- libc/linuxthreads/sysdeps/mips/pspinlock.c.llsc Mon Feb 4 13:45:01 2002 +++ libc/linuxthreads/sysdeps/mips/pspinlock.c Tue Feb 5 11:26:22 2002 @@ -32,17 +32,13 @@ __pthread_spin_lock (pthread_spinlock_t unsigned int tmp1, tmp2; asm volatile - ("\t\t\t# spin_lock\n\t" - "ll %1,%3\n" + ("\t\t\t# spin_lock\n" "1:\n\t" - ".set push\n\t" - ".set noreorder\n\t" + "ll %1,%3\n" "bnez %1,1b\n\t" " li %2,1\n\t" "sc %2,%0\n\t" - "beqzl %2,1b\n\t" - " ll %1,%3\n\t" - ".set pop" + "beqz %2,1b\n\t" : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) : "m" (*lock) : "memory"); --- libc/linuxthreads/sysdeps/mips/pt-machine.h.llsc Mon Feb 4 13:45:01 2002 +++ libc/linuxthreads/sysdeps/mips/pt-machine.h Tue Feb 5 11:26:51 2002 @@ -57,18 +57,14 @@ __compare_and_swap (long int *p, long in long int ret, temp; __asm__ __volatile__ - ("/* Inline compare & swap */\n\t" - "ll %1,%5\n" + ("/* Inline compare & swap */\n" "1:\n\t" - ".set push\n\t" - ".set noreorder\n\t" + "ll %1,%5\n" "bne %1,%3,2f\n\t" " move %0,$0\n\t" "move %0,%4\n\t" "sc %0,%2\n\t" - "beqzl %0,1b\n\t" - " ll %1,%5\n\t" - ".set pop\n" + "beqz %0,1b\n\t" "2:\n\t" "/* End compare & swap */" : "=&r" (ret), "=&r" (temp), "=m" (*p) --- libc/sysdeps/mips/atomicity.h.llsc Mon Feb 4 13:45:18 2002 +++ libc/sysdeps/mips/atomicity.h Tue Feb 5 11:25:09 2002 @@ -32,16 +32,12 @@ exchange_and_add (volatile uint32_t *mem int result, tmp; __asm__ __volatile__ - ("/* Inline exchange & add */\n\t" + ("/* Inline exchange & add */\n" + "1:\n" "ll %0,%3\n" - "1:\n\t" "addu %1,%4,%0\n\t" "sc %1,%2\n\t" - ".set push\n\t" - ".set noreorder\n\t" - "beqzl %1,1b\n\t" - " ll %0,%3\n\t" - ".set pop\n\t" + "beqz %1,1b\n\t" "/* End exchange & add */" : "=&r"(result), "=&r"(tmp), "=m"(*mem) : "m" (*mem), "r"(val) @@ -57,16 +53,12 @@ atomic_add (volatile uint32_t *mem, int int result; __asm__ __volatile__ - ("/* Inline atomic add */\n\t" - "ll %0,%2\n" + ("/* Inline atomic add */\n" "1:\n\t" + "ll %0,%2\n" "addu %0,%3,%0\n\t" "sc %0,%1\n\t" - ".set push\n\t" - ".set noreorder\n\t" - "beqzl %0,1b\n\t" - " ll %0,%2\n\t" - ".set pop\n\t" + "beqz %0,1b\n\t" "/* End atomic add */" : "=&r"(result), "=m"(*mem) : "m" (*mem), "r"(val) @@ -80,18 +72,14 @@ compare_and_swap (volatile long int *p, long int ret, temp; __asm__ __volatile__ - ("/* Inline compare & swap */\n\t" - "ll %1,%5\n" + ("/* Inline compare & swap */\n" "1:\n\t" - ".set push\n\t" - ".set noreorder\n\t" + "ll %1,%5\n" "bne %1,%3,2f\n\t" " move %0,$0\n\t" "move %0,%4\n\t" "sc %0,%2\n\t" - "beqzl %0,1b\n\t" - " ll %1,%5\n\t" - ".set pop\n" + "beqz %0,1b\n\t" "2:\n\t" "/* End compare & swap */" : "=&r" (ret), "=&r" (temp), "=m" (*p) --- libc/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Feb 4 13:45:28 2002 +++ libc/sysdeps/unix/sysv/linux/mips/sys/tas.h Tue Feb 5 11:25:34 2002 @@ -42,17 +42,13 @@ _test_and_set (int *p, int v) __THROW int r, t; __asm__ __volatile__ - ("/* Inline test and set */\n\t" - "ll %0,%3\n" + ("/* Inline test and set */\n" "1:\n\t" - ".set push\n\t" - ".set noreorder\n\t" + "ll %0,%3\n" "beq %0,%4,2f\n\t" " move %1,%4\n\t" "sc %1,%2\n\t" - "beqzl %1,1b\n\t" - " ll %0,%3\n\t" - ".set pop\n" + "beqz %1,1b\n\t" "2:\n\t" "/* End test and set */" : "=&r" (r), "=&r" (t), "=m" (*p) ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 19:30 ` H . J . Lu @ 2002-02-05 21:54 ` H . J . Lu 2002-02-06 10:32 ` Ralf Baechle 2002-02-06 11:37 ` PATCH: Fix ll/sc for mips (take 3) Maciej W. Rozycki 0 siblings, 2 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-05 21:54 UTC (permalink / raw) To: Dominic Sweetman, GNU C Library, linux-mips, binutils On Tue, Feb 05, 2002 at 11:30:17AM -0800, H . J . Lu wrote: > On Mon, Feb 04, 2002 at 09:58:04PM -0500, Daniel Jacobowitz wrote: > > On Mon, Feb 04, 2002 at 05:28:57PM -0800, H . J . Lu wrote: > > > On Mon, Feb 04, 2002 at 04:46:07PM +0000, Dominic Sweetman wrote: > > > > > > > > H . J . Lu (hjl@lucon.org) writes: > > > > > > > > > I can change glibc not to use branch-likely without using nop. But it > > > > > may require one or two instructions outside of the loop. Should I do > > > > > it given what we know now? > > > > > > > > I would not recommend using "branch likely" in assembler coding, if > > > > that's what you're asking. > > > > > > > > > > Here is a patch to remove branch likely. But I couldn't find a way > > > not to fill the delay slot with nop. BTW, is that safe to remove > > > ".set noreorder"? > > > > You mean, if there is nothing which can be put there? Yes, it's safe. > > > > Here is a new patch. I removed the extra "ll" in the delay slot. > This patch is wrong. The assmebler turns that into 0x2ab0f724 <__pthread_alt_lock+212>: ll v1,0(s1) 0x2ab0f728 <__pthread_alt_lock+216>: bne v1,s0,0x2ab0f744 <__pthread_alt_lock+244> 0x2ab0f72c <__pthread_alt_lock+220>: nop 0x2ab0f730 <__pthread_alt_lock+224>: move a1,zero 0x2ab0f734 <__pthread_alt_lock+228>: move a1,v0 0x2ab0f738 <__pthread_alt_lock+232>: sc a1,0(s1) 0x2ab0f73c <__pthread_alt_lock+236>: beqz a1,0x2ab0f724 <__pthread_alt_lock+212> 0x2ab0f740 <__pthread_alt_lock+240>: nop If I do __asm__ __volatile__ ("/* Inline compare & swap */\n" "1:\n\t" "ll %1,%5\n\t" "move %0,$0\n\t" "bne %1,%3,2f\n\t" "move %0,%4\n\t" "sc %0,%2\n\t" "beqz %0,1b\n\t" "2:\n\t" "/* End compare & swap */" : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); The assembler will do 0xd724 <__pthread_alt_lock+212>: ll v1,0(s1) 0xd728 <__pthread_alt_lock+216>: move a1,zero 0xd72c <__pthread_alt_lock+220>: bne v1,s0,0xd744 <__pthread_alt_lock+244> 0xd730 <__pthread_alt_lock+224>: nop 0xd734 <__pthread_alt_lock+228>: move a1,v0 0xd738 <__pthread_alt_lock+232>: sc a1,0(s1) 0xd73c <__pthread_alt_lock+236>: beqz a1,0xd724 <__pthread_alt_lock+212> 0xd740 <__pthread_alt_lock+240>: nop There is an extra "nop" in the delay slot. I don't think gas is smart enough to fill the delay slot. I will put back those ".set noredor". H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 21:54 ` H . J . Lu @ 2002-02-06 10:32 ` Ralf Baechle 2002-02-06 20:45 ` Why does -g turn off filling the delat slot? H . J . Lu 2002-02-06 11:37 ` PATCH: Fix ll/sc for mips (take 3) Maciej W. Rozycki 1 sibling, 1 reply; 75+ messages in thread From: Ralf Baechle @ 2002-02-06 10:32 UTC (permalink / raw) To: H . J . Lu; +Cc: Dominic Sweetman, GNU C Library, linux-mips, binutils On Tue, Feb 05, 2002 at 01:54:07PM -0800, H . J . Lu wrote: > __asm__ __volatile__ > ("/* Inline compare & swap */\n" > "1:\n\t" > "ll %1,%5\n\t" > "move %0,$0\n\t" > "bne %1,%3,2f\n\t" > "move %0,%4\n\t" > "sc %0,%2\n\t" > "beqz %0,1b\n\t" > "2:\n\t" > "/* End compare & swap */" > : "=&r" (ret), "=&r" (temp), "=m" (*p) > : "r" (oldval), "r" (newval), "m" (*p) > : "memory"); > > The assembler will do > > 0xd724 <__pthread_alt_lock+212>: ll v1,0(s1) > 0xd728 <__pthread_alt_lock+216>: move a1,zero > 0xd72c <__pthread_alt_lock+220>: bne v1,s0,0xd744 <__pthread_alt_lock+244> > 0xd730 <__pthread_alt_lock+224>: nop > 0xd734 <__pthread_alt_lock+228>: move a1,v0 > 0xd738 <__pthread_alt_lock+232>: sc a1,0(s1) > 0xd73c <__pthread_alt_lock+236>: beqz a1,0xd724 <__pthread_alt_lock+212> > 0xd740 <__pthread_alt_lock+240>: nop > > There is an extra "nop" in the delay slot. I don't think gas is smart > enough to fill the delay slot. I will put back those ".set noredor". The solution is to move the move instruction in front of the branch instruction. The assembler will then move it into the delay slot: __asm__ __volatile__ ("/* Inline compare & swap */\n" "1:\n\t" "ll %1,%5\n\t" "move %0,$0\n\t" "move %0,%4\n\t" "bne %1,%3,2f\n\t" "sc %0,%2\n\t" "beqz %0,1b\n\t" "2:\n\t" "/* End compare & swap */" : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); Also this function looks like a good candidate for inlining (Is it actually inlined? Haven't checked ...) where depending on it's use the address of *p is loaded twice from the GOT, so changing the code to: __asm__ __volatile__ ("/* Inline compare & swap */\n" "1:\n\t" "ll %1,(%5)\n\t" "move %0,$0\n\t" "move %0,%4\n\t" "bne %1,%3,2f\n\t" "sc %0,(%2)\n\t" "beqz %0,1b\n\t" "2:\n\t" "/* End compare & swap */" : "=&r" (ret), "=&r" (temp), "=r" (p) : "r" (oldval), "r" (newval), "r" (p) : "memory"); will avoid having to pay that PIC bloat twice and get you around the gas inefficiency of putting in too many nops into PIC code. Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Why does -g turn off filling the delat slot? 2002-02-06 10:32 ` Ralf Baechle @ 2002-02-06 20:45 ` H . J . Lu 2002-02-06 21:00 ` PATCH: Modify the mips gas behavior for -g -O H . J . Lu 0 siblings, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-06 20:45 UTC (permalink / raw) To: Ralf Baechle; +Cc: Dominic Sweetman, GNU C Library, linux-mips, binutils On Wed, Feb 06, 2002 at 11:32:59AM +0100, Ralf Baechle wrote: > > > > There is an extra "nop" in the delay slot. I don't think gas is smart > > enough to fill the delay slot. I will put back those ".set noredor". > > The solution is to move the move instruction in front of the branch > instruction. The assembler will then move it into the delay slot: > I found out why it didn't work for me. The problem is -g turns off filling the delay slot. The mips as has case 'g': if (arg == NULL) mips_debug = 2; else mips_debug = atoi (arg); /* When the MIPS assembler sees -g or -g2, it does not do optimizations which limit full symbolic debugging. We take that to be equivalent to -O0. */ if (mips_debug == 2) mips_optimize = 1; break; It doesn't matter of you pass -O to as or not. I'd like to override it if -O is seen. H.J. ^ permalink raw reply [flat|nested] 75+ messages in thread
* PATCH: Modify the mips gas behavior for -g -O 2002-02-06 20:45 ` Why does -g turn off filling the delat slot? H . J . Lu @ 2002-02-06 21:00 ` H . J . Lu 2002-02-06 21:16 ` Eric Christopher 0 siblings, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-06 21:00 UTC (permalink / raw) To: echristo; +Cc: linux-mips, binutils On Wed, Feb 06, 2002 at 12:45:38PM -0800, H . J . Lu wrote: > On Wed, Feb 06, 2002 at 11:32:59AM +0100, Ralf Baechle wrote: > > > > > > There is an extra "nop" in the delay slot. I don't think gas is smart > > > enough to fill the delay slot. I will put back those ".set noredor". > > > > The solution is to move the move instruction in front of the branch > > instruction. The assembler will then move it into the delay slot: > > > > I found out why it didn't work for me. The problem is -g turns off > filling the delay slot. The mips as has > > case 'g': > if (arg == NULL) > mips_debug = 2; > else > mips_debug = atoi (arg); > /* When the MIPS assembler sees -g or -g2, it does not do > optimizations which limit full symbolic debugging. We take > that to be equivalent to -O0. */ > if (mips_debug == 2) > mips_optimize = 1; > break; > > It doesn't matter of you pass -O to as or not. I'd like to override it > if -O is seen. > > Here is a patch which does what I want. Any comments? Eric, can you approve http://sources.redhat.com/ml/binutils/2002-02/msg00028.html Thanks. H.J. ---- 2002-02-06 H.J. Lu (hjl@gnu.org) * config/tc-mips.c (mips_optimize): Initialize to -2. (md_begin): Set mips_optimize to -mips_optimize if it is less than 0. (md_parse_option): Set mips_optimize to 1 for -g only if it is less than 0. --- gas/config/tc-mips.c.opt Sun Feb 3 23:47:26 2002 +++ gas/config/tc-mips.c Wed Feb 6 12:55:39 2002 @@ -431,7 +431,7 @@ static int mips_frame_reg_valid = 0; unneeded NOPs and swap branch instructions when possible. A value of 1 means to not swap branches. A value of 0 means to always insert NOPs. */ -static int mips_optimize = 2; +static int mips_optimize = -2; /* Debugging level. -g sets this to 2. -gN sets this to N. -g0 is equivalent to seeing no -g option at all. */ @@ -1020,6 +1020,9 @@ md_begin () int target_cpu_had_mips16 = 0; const struct mips_cpu_info *ci; + if (mips_optimize < 0) + mips_optimize = -mips_optimize; + /* GP relative stuff not working for PE */ if (strncmp (TARGET_OS, "pe", 2) == 0 && g_switch_value != 0) @@ -9794,7 +9797,7 @@ md_parse_option (c, arg) /* When the MIPS assembler sees -g or -g2, it does not do optimizations which limit full symbolic debugging. We take that to be equivalent to -O0. */ - if (mips_debug == 2) + if (mips_debug == 2 && mips_optimize < 0) mips_optimize = 1; break; ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Modify the mips gas behavior for -g -O 2002-02-06 21:00 ` PATCH: Modify the mips gas behavior for -g -O H . J . Lu @ 2002-02-06 21:16 ` Eric Christopher 2002-02-06 21:40 ` Ian Lance Taylor 0 siblings, 1 reply; 75+ messages in thread From: Eric Christopher @ 2002-02-06 21:16 UTC (permalink / raw) To: H . J . Lu; +Cc: linux-mips, binutils > Here is a patch which does what I want. Any comments? > Does anyone care if we have MIPS compatibility? I remember seeing this a few years ago and wondering why we were doing it this way. I remember at one time debuggers had problems with optimized code, but gdb has come a long way since then. Is there any reason to have this code in there at all now, i.e. should we just go off of optimization level and not debug level at all? > Eric, can you approve > > http://sources.redhat.com/ml/binutils/2002-02/msg00028.html > If this is the one for header file information, then go ahead. We need to come up with a better way of doing this though. -eric -- I will not use abbrev. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Modify the mips gas behavior for -g -O 2002-02-06 21:16 ` Eric Christopher @ 2002-02-06 21:40 ` Ian Lance Taylor 2002-02-06 21:46 ` Eric Christopher 2002-02-06 22:00 ` PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips H . J . Lu 0 siblings, 2 replies; 75+ messages in thread From: Ian Lance Taylor @ 2002-02-06 21:40 UTC (permalink / raw) To: Eric Christopher; +Cc: H . J . Lu, linux-mips, binutils Eric Christopher <echristo@redhat.com> writes: > > Here is a patch which does what I want. Any comments? > > > > Does anyone care if we have MIPS compatibility? I remember seeing this a > few years ago and wondering why we were doing it this way. I remember at > one time debuggers had problems with optimized code, but gdb has come a > long way since then. Is there any reason to have this code in there at > all now, i.e. should we just go off of optimization level and not debug > level at all? In general the reason for this sort of compatibility is for easier gcc support. It's normally more convenient if the GNU assembler and the native assembler present the same interface, so that you don't have to use --with-gnu-assembler to get the right result when configuring gcc. It's far more common to invoke the assembler via the compiler than to invoke it directly, so making it work correctly when invoked via the compiler is normally best. H.J., my question on this patch would be: why is -g being passed to the assembler? Ian ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Modify the mips gas behavior for -g -O 2002-02-06 21:40 ` Ian Lance Taylor @ 2002-02-06 21:46 ` Eric Christopher 2002-02-06 22:00 ` PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips H . J . Lu 1 sibling, 0 replies; 75+ messages in thread From: Eric Christopher @ 2002-02-06 21:46 UTC (permalink / raw) To: Ian Lance Taylor; +Cc: H . J . Lu, linux-mips, binutils > In general the reason for this sort of compatibility is for easier gcc > support. It's normally more convenient if the GNU assembler and the > native assembler present the same interface, so that you don't have to > use --with-gnu-assembler to get the right result when configuring gcc. > Ok. That's fair enough. I was hoping you'd pipe up here. :) -eric -- I will not use abbrev. ^ permalink raw reply [flat|nested] 75+ messages in thread
* PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips 2002-02-06 21:40 ` Ian Lance Taylor 2002-02-06 21:46 ` Eric Christopher @ 2002-02-06 22:00 ` H . J . Lu 2002-02-07 8:24 ` Eric Christopher 1 sibling, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-06 22:00 UTC (permalink / raw) To: Ian Lance Taylor; +Cc: Eric Christopher, linux-mips, binutils, gcc-patches On Wed, Feb 06, 2002 at 01:40:20PM -0800, Ian Lance Taylor wrote: > > H.J., my question on this patch would be: why is -g being passed to > the assembler? > Here is a patch similar to Irix 6. H.J. ---- 2002-02-06 H.J. Lu <hjl@gnu.org> * config/mips/linux.h (SUBTARGET_ASM_DEBUGGING_SPEC): Defined. --- gcc/config/mips/linux.h.gas Thu Nov 15 12:21:17 2001 +++ gcc/config/mips/linux.h Wed Feb 6 13:57:47 2002 @@ -170,6 +170,9 @@ Boston, MA 02111-1307, USA. */ %{!fno-PIC:%{!fno-pic:-KPIC}} \ %{fno-PIC:-non_shared} %{fno-pic:-non_shared}" +#undef SUBTARGET_ASM_DEBUGGING_SPEC +#define SUBTARGET_ASM_DEBUGGING_SPEC "-g0" + /* We don't need those nonsenses. */ #undef INVOKE__main #undef CTOR_LIST_BEGIN ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips 2002-02-06 22:00 ` PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips H . J . Lu @ 2002-02-07 8:24 ` Eric Christopher 0 siblings, 0 replies; 75+ messages in thread From: Eric Christopher @ 2002-02-07 8:24 UTC (permalink / raw) To: H . J . Lu; +Cc: Ian Lance Taylor, linux-mips, binutils, gcc-patches > Here is a patch similar to Irix 6. > > > H.J. > ---- > 2002-02-06 H.J. Lu <hjl@gnu.org> > > * config/mips/linux.h (SUBTARGET_ASM_DEBUGGING_SPEC): Defined. > Have at. :) -eric -- I will not use abbrev. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-05 21:54 ` H . J . Lu 2002-02-06 10:32 ` Ralf Baechle @ 2002-02-06 11:37 ` Maciej W. Rozycki 1 sibling, 0 replies; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-06 11:37 UTC (permalink / raw) To: H . J . Lu; +Cc: Dominic Sweetman, GNU C Library, linux-mips, binutils On Tue, 5 Feb 2002, H . J . Lu wrote: > 0xd724 <__pthread_alt_lock+212>: ll v1,0(s1) > 0xd728 <__pthread_alt_lock+216>: move a1,zero > 0xd72c <__pthread_alt_lock+220>: bne v1,s0,0xd744 <__pthread_alt_lock+244> > 0xd730 <__pthread_alt_lock+224>: nop > 0xd734 <__pthread_alt_lock+228>: move a1,v0 > 0xd738 <__pthread_alt_lock+232>: sc a1,0(s1) > 0xd73c <__pthread_alt_lock+236>: beqz a1,0xd724 <__pthread_alt_lock+212> > 0xd740 <__pthread_alt_lock+240>: nop > > There is an extra "nop" in the delay slot. I don't think gas is smart > enough to fill the delay slot. I will put back those ".set noredor". The code uses the same count of cycles either way. As you may see above, gas was smart enough to fill the "ll" load delay slot. Maybe it should prefer filling the branch delay slot instead due to smaller code... -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-04 9:46 ` Dominic Sweetman 2002-02-04 16:31 ` H . J . Lu @ 2002-02-04 17:44 ` cgd 1 sibling, 0 replies; 75+ messages in thread From: cgd @ 2002-02-04 17:44 UTC (permalink / raw) To: Dominic Sweetman; +Cc: linux-mips thanks for the explanation. just goes to show that i should probably sit down and read your book from cover to cover one day, rather than just recommending it to the newbies. 8-) chris ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips (take 3) 2002-02-02 18:53 ` Justin Carlson 2002-02-02 20:03 ` H . J . Lu @ 2002-02-04 6:46 ` Ralf Baechle 1 sibling, 0 replies; 75+ messages in thread From: Ralf Baechle @ 2002-02-04 6:46 UTC (permalink / raw) To: Justin Carlson Cc: Daniel Jacobowitz, H . J . Lu, Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips On Sat, Feb 02, 2002 at 01:53:23PM -0500, Justin Carlson wrote: > 2) More importantly, most implementations don't use any sort of dynamic > branch prediction on branch likelies. They predict taken, always, since > that's the specified intent (it's a branch *likely* to be taken). CPU guys hate branch likely and would probably love if whoever invented them hires at Intel ;-) > For most spin locks, the normal behaviour is a fall through, not taking > that branch, so you're inflicting a branch mispredict penalty on every > lock grabbed without contention. Even for locks which the general case > is contention, giving the processor branch predictor a chance to learn > that is a Good Idea. I was thinking about spinlocks like retry: la addrreg, retry ll reg, lockvar ... sc reg, lockvar teq $0, reg That would depend on the price of a trap instruction when it's not taken and the probability of the lock being congested. Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 7:17 ` H . J . Lu 2002-02-01 11:45 ` Maciej W. Rozycki @ 2002-02-04 7:01 ` Ralf Baechle 2002-02-04 14:54 ` Maciej W. Rozycki 1 sibling, 1 reply; 75+ messages in thread From: Ralf Baechle @ 2002-02-04 7:01 UTC (permalink / raw) To: H . J . Lu; +Cc: Hiroyuki Machida, macro, libc-alpha, linux-mips On Thu, Jan 31, 2002 at 11:17:14PM -0800, H . J . Lu wrote: > > Gas will fill delay slots. Same object codes will be produced, so I > > think you don't have to do that by hand. > > It will make the code more readable. We don't have to guess what > the assembler will do. Generally speaking a MIPS assembler is free to do arbitrary reordering. In the past there have been non-GNU assembler that were doing more massive reordering than gcc does ... Using .set noreorder means you dump the assembler's intelligence and take full responsibility for dealing with all interlocks (or the lack thereof) and other performance issues yourself. Ralf ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-04 7:01 ` PATCH: Fix ll/sc for mips Ralf Baechle @ 2002-02-04 14:54 ` Maciej W. Rozycki 0 siblings, 0 replies; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-04 14:54 UTC (permalink / raw) To: Ralf Baechle; +Cc: H . J . Lu, Hiroyuki Machida, libc-alpha, linux-mips On Mon, 4 Feb 2002, Ralf Baechle wrote: > > It will make the code more readable. We don't have to guess what > > the assembler will do. > > Generally speaking a MIPS assembler is free to do arbitrary reordering. > In the past there have been non-GNU assembler that were doing more massive > reordering than gcc does ... Using .set noreorder means you dump the > assembler's intelligence and take full responsibility for dealing with > all interlocks (or the lack thereof) and other performance issues > yourself. That's why I'm still uneasy about the patch or, specifically, its _test_and_set hunk. It's best to avoid pretending we have the knowledge beyond what an assembler has, as it's often not the case -- consider all the options an assembler can accept for code variations. Using "noreorder" is really justified if you need to generate an exact opcode stream for some reason (perhaps for a trampoline with a fixed format) or you know you have the knowledege beyond what an assembler has, e.g. you know an instruction can (or must) really be placed in a delay slot even though a dependency analysis performed by an assembler shows otherwise. Also beware of implicit macros which may expand into multiple instructions -- their semantics when placed in a delay slot may differ from what one may expect! -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 3:35 ` Hiroyuki Machida 2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku 2002-02-01 7:17 ` H . J . Lu @ 2002-02-01 11:50 ` Maciej W. Rozycki 2002-02-01 17:40 ` H . J . Lu 2 siblings, 1 reply; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-01 11:50 UTC (permalink / raw) To: Hiroyuki Machida; +Cc: hjl, libc-alpha, linux-mips On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > It will fail if *p is not same as oldval. > > Please note that "sc" may fail even if nobody write the > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > MIPS RUN" for more detail.) > So, after your patch applied, compare_and_swap() may fail, even if > *p is equal to oldval. That's exactly what I meant -- "sc" may fail to execute the store, while "cmpxchgl" may not. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 11:50 ` Maciej W. Rozycki @ 2002-02-01 17:40 ` H . J . Lu 2002-02-01 21:41 ` Maciej W. Rozycki 0 siblings, 1 reply; 75+ messages in thread From: H . J . Lu @ 2002-02-01 17:40 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 12:50:48PM +0100, Maciej W. Rozycki wrote: > On Fri, 1 Feb 2002, Hiroyuki Machida wrote: > > > > It will fail if *p is not same as oldval. > > > > Please note that "sc" may fail even if nobody write the > > variable. (See P.211 "8.4.2 Load-Linked/Sotre-Conditional" of "See > > MIPS RUN" for more detail.) > > So, after your patch applied, compare_and_swap() may fail, even if > > *p is equal to oldval. > > That's exactly what I meant -- "sc" may fail to execute the store, while > "cmpxchgl" may not. > Here is the updated patch. H.J. --- 2002-01-31 H.J. Lu <hjl@gnu.org> * sysdeps/mips/pspinlock.c (__pthread_spin_lock): Use a different register in the delayed slot. * sysdeps/mips/pt-machine.h (testandset): Call _test_and_set. (__compare_and_swap): Return 0 only when failed to compare. 2002-01-31 H.J. Lu <hjl@gnu.org> * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 only when failed to compare. * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Fill the delay slot. --- glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c.llsc Thu Jan 31 10:49:37 2002 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c Thu Jan 31 10:49:37 2002 @@ -29,20 +29,21 @@ int __pthread_spin_lock (pthread_spinlock_t *lock) { - unsigned int tmp; + unsigned int tmp1, tmp2; asm volatile ("\t\t\t# spin_lock\n\t" - "1:\n\t" - "ll %1,%2\n\t" ".set push\n\t" - ".set noreorder\n\t" + ".set noreorder\n" + "1:\n\t" + "ll %1,%3\n\t" "bnez %1,1b\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%0\n\t" - "beqz %1,1b" - : "=m" (*lock), "=&r" (tmp) + "li %2,1\n\t" + "sc %2,%0\n\t" + "beqz %2,1b\n\t" + "nop\n\t" + ".set pop" + : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) : "m" (*lock) : "memory"); --- glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h.llsc Sat Dec 9 08:55:34 2000 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h Fri Feb 1 09:19:44 2002 @@ -33,41 +33,11 @@ /* Spinlock implementation; required. */ -#if (_MIPS_ISA >= _MIPS_ISA_MIPS2) - -PT_EI long int -testandset (int *spinlock) -{ - long int ret, temp; - - __asm__ __volatile__ - ("/* Inline spinlock test & set */\n\t" - "1:\n\t" - "ll %0,%3\n\t" - ".set push\n\t" - ".set noreorder\n\t" - "bnez %0,2f\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n\t" - "/* End spinlock test & set */" - : "=&r" (ret), "=&r" (temp), "=m" (*spinlock) - : "m" (*spinlock) - : "memory"); - - return ret; -} - -#else /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ - PT_EI long int testandset (int *spinlock) { return _test_and_set (spinlock, 1); } -#endif /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ /* Get some notion of the current stack. Need not be exactly the top @@ -84,22 +54,24 @@ register char * stack_pointer __asm__ (" PT_EI int __compare_and_swap (long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ - ("/* Inline compare & swap */\n\t" + ("/* Inline compare & swap */\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + "ll %1,%5\n\t" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - " move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + "move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqz %0,1b\n\t" + "nop\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/mips/atomicity.h.llsc Mon Jul 9 11:58:19 2001 +++ glibc-2.2.4/sysdeps/mips/atomicity.h Fri Feb 1 09:21:41 2002 @@ -32,12 +32,16 @@ exchange_and_add (volatile uint32_t *mem int result, tmp; __asm__ __volatile__ - ("/* Inline exchange & add */\n\t" + ("/* Inline exchange & add */\n" "1:\n\t" "ll %0,%3\n\t" "addu %1,%4,%0\n\t" "sc %1,%2\n\t" + ".set push\n\t" + ".set noreorder\n\t" "beqz %1,1b\n\t" + "nop\n\t" + ".set pop\n\t" "/* End exchange & add */" : "=&r"(result), "=&r"(tmp), "=m"(*mem) : "m" (*mem), "r"(val) @@ -53,12 +57,16 @@ atomic_add (volatile uint32_t *mem, int int result; __asm__ __volatile__ - ("/* Inline atomic add */\n\t" + ("/* Inline atomic add */\n" "1:\n\t" "ll %0,%2\n\t" "addu %0,%3,%0\n\t" "sc %0,%1\n\t" + ".set push\n\t" + ".set noreorder\n\t" "beqz %0,1b\n\t" + "nop\n\t" + ".set pop\n\t" "/* End atomic add */" : "=&r"(result), "=m"(*mem) : "m" (*mem), "r"(val) @@ -69,22 +77,24 @@ static inline int __attribute__ ((unused)) compare_and_swap (volatile long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ - ("/* Inline compare & swap */\n\t" + ("/* Inline compare & swap */\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + "ll %1,%5\n\t" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - "move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + "move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqz %0,1b\n\t" + "nop\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Jul 9 11:58:45 2001 +++ glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h Thu Jan 31 10:49:37 2002 @@ -42,16 +42,17 @@ _test_and_set (int *p, int v) __THROW int r, t; __asm__ __volatile__ - ("1:\n\t" + (".set push\n\t" + ".set noreorder\n" + "1:\n\t" "ll %0,%3\n\t" - ".set push\n\t" - ".set noreorder\n\t" "beq %0,%4,2f\n\t" - " move %1,%4\n\t" - ".set pop\n\t" + "move %1,%4\n\t" "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n" + "beqz %1,1b\n\t" + "nop\n" + "2:\n\t" + ".set pop" : "=&r" (r), "=&r" (t), "=m" (*p) : "m" (*p), "r" (v) : "memory"); ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 17:40 ` H . J . Lu @ 2002-02-01 21:41 ` Maciej W. Rozycki 2002-02-01 22:47 ` H . J . Lu 0 siblings, 1 reply; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-01 21:41 UTC (permalink / raw) To: H . J . Lu; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Fri, 1 Feb 2002, H . J . Lu wrote: > Here is the updated patch. Please do indent instructions in branch delay slots like it's done in other code (and here originally as well). It much improves the perception of what exactly is going on. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 21:41 ` Maciej W. Rozycki @ 2002-02-01 22:47 ` H . J . Lu 2002-02-02 11:06 ` Maciej W. Rozycki 2002-02-03 2:29 ` Ulrich Drepper 0 siblings, 2 replies; 75+ messages in thread From: H . J . Lu @ 2002-02-01 22:47 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Fri, Feb 01, 2002 at 10:41:17PM +0100, Maciej W. Rozycki wrote: > On Fri, 1 Feb 2002, H . J . Lu wrote: > > > Here is the updated patch. > > Please do indent instructions in branch delay slots like it's done in > other code (and here originally as well). It much improves the perception > of what exactly is going on. > Like this? H.J. ---- 2002-02-01 H.J. Lu <hjl@gnu.org> * sysdeps/mips/pspinlock.c (__pthread_spin_lock): Use a different register in the delayed slot. Use branch likely. * sysdeps/mips/pt-machine.h (testandset): Call _test_and_set. (__compare_and_swap): Return 0 only when failed to compare. Use branch likely. 2002-02-01 H.J. Lu <hjl@gnu.org> * sysdeps/mips/atomicity.h (exchange_and_add): Use branch likely. (atomic_add): Likewise. (compare_and_swap): Return 0 only when failed to compare. Use branch likely. * sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Use branch likely. --- glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c.llsc Fri Feb 1 10:37:16 2002 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pspinlock.c Fri Feb 1 14:42:46 2002 @@ -29,20 +29,21 @@ int __pthread_spin_lock (pthread_spinlock_t *lock) { - unsigned int tmp; + unsigned int tmp1, tmp2; asm volatile ("\t\t\t# spin_lock\n\t" + "ll %1,%3\n" "1:\n\t" - "ll %1,%2\n\t" ".set push\n\t" ".set noreorder\n\t" "bnez %1,1b\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%0\n\t" - "beqz %1,1b" - : "=m" (*lock), "=&r" (tmp) + " li %2,1\n\t" + "sc %2,%0\n\t" + "beqzl %2,1b\n\t" + " ll %1,%3\n\t" + ".set pop" + : "=m" (*lock), "=&r" (tmp1), "=&r" (tmp2) : "m" (*lock) : "memory"); --- glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h.llsc Sat Dec 9 08:55:34 2000 +++ glibc-2.2.4/linuxthreads/sysdeps/mips/pt-machine.h Fri Feb 1 14:42:20 2002 @@ -33,41 +33,11 @@ /* Spinlock implementation; required. */ -#if (_MIPS_ISA >= _MIPS_ISA_MIPS2) - -PT_EI long int -testandset (int *spinlock) -{ - long int ret, temp; - - __asm__ __volatile__ - ("/* Inline spinlock test & set */\n\t" - "1:\n\t" - "ll %0,%3\n\t" - ".set push\n\t" - ".set noreorder\n\t" - "bnez %0,2f\n\t" - " li %1,1\n\t" - ".set pop\n\t" - "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n\t" - "/* End spinlock test & set */" - : "=&r" (ret), "=&r" (temp), "=m" (*spinlock) - : "m" (*spinlock) - : "memory"); - - return ret; -} - -#else /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ - PT_EI long int testandset (int *spinlock) { return _test_and_set (spinlock, 1); } -#endif /* !(_MIPS_ISA >= _MIPS_ISA_MIPS2) */ /* Get some notion of the current stack. Need not be exactly the top @@ -84,22 +54,24 @@ register char * stack_pointer __asm__ (" PT_EI int __compare_and_swap (long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ ("/* Inline compare & swap */\n\t" + "ll %1,%5\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - " move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + " move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqzl %0,1b\n\t" + " ll %1,%5\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/mips/atomicity.h.llsc Mon Jul 9 11:58:19 2001 +++ glibc-2.2.4/sysdeps/mips/atomicity.h Fri Feb 1 14:43:34 2002 @@ -33,11 +33,15 @@ exchange_and_add (volatile uint32_t *mem __asm__ __volatile__ ("/* Inline exchange & add */\n\t" + "ll %0,%3\n" "1:\n\t" - "ll %0,%3\n\t" "addu %1,%4,%0\n\t" "sc %1,%2\n\t" - "beqz %1,1b\n\t" + ".set push\n\t" + ".set noreorder\n\t" + "beqzl %1,1b\n\t" + " ll %0,%3\n\t" + ".set pop\n\t" "/* End exchange & add */" : "=&r"(result), "=&r"(tmp), "=m"(*mem) : "m" (*mem), "r"(val) @@ -54,11 +58,15 @@ atomic_add (volatile uint32_t *mem, int __asm__ __volatile__ ("/* Inline atomic add */\n\t" + "ll %0,%2\n" "1:\n\t" - "ll %0,%2\n\t" "addu %0,%3,%0\n\t" "sc %0,%1\n\t" - "beqz %0,1b\n\t" + ".set push\n\t" + ".set noreorder\n\t" + "beqzl %0,1b\n\t" + " ll %0,%2\n\t" + ".set pop\n\t" "/* End atomic add */" : "=&r"(result), "=m"(*mem) : "m" (*mem), "r"(val) @@ -69,22 +77,24 @@ static inline int __attribute__ ((unused)) compare_and_swap (volatile long int *p, long int oldval, long int newval) { - long int ret; + long int ret, temp; __asm__ __volatile__ ("/* Inline compare & swap */\n\t" + "ll %1,%5\n" "1:\n\t" - "ll %0,%4\n\t" - ".set push\n" + ".set push\n\t" ".set noreorder\n\t" - "bne %0,%2,2f\n\t" - "move %0,%3\n\t" - ".set pop\n\t" - "sc %0,%1\n\t" - "beqz %0,1b\n" + "bne %1,%3,2f\n\t" + " move %0,$0\n\t" + "move %0,%4\n\t" + "sc %0,%2\n\t" + "beqzl %0,1b\n\t" + " ll %1,%5\n\t" + ".set pop\n" "2:\n\t" "/* End compare & swap */" - : "=&r" (ret), "=m" (*p) + : "=&r" (ret), "=&r" (temp), "=m" (*p) : "r" (oldval), "r" (newval), "m" (*p) : "memory"); --- glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h.llsc Mon Jul 9 11:58:45 2001 +++ glibc-2.2.4/sysdeps/unix/sysv/linux/mips/sys/tas.h Fri Feb 1 14:43:52 2002 @@ -42,16 +42,19 @@ _test_and_set (int *p, int v) __THROW int r, t; __asm__ __volatile__ - ("1:\n\t" - "ll %0,%3\n\t" + ("/* Inline test and set */\n\t" + "ll %0,%3\n" + "1:\n\t" ".set push\n\t" ".set noreorder\n\t" "beq %0,%4,2f\n\t" " move %1,%4\n\t" - ".set pop\n\t" "sc %1,%2\n\t" - "beqz %1,1b\n" - "2:\n" + "beqzl %1,1b\n\t" + " ll %0,%3\n\t" + ".set pop\n" + "2:\n\t" + "/* End test and set */" : "=&r" (r), "=&r" (t), "=m" (*p) : "m" (*p), "r" (v) : "memory"); ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 22:47 ` H . J . Lu @ 2002-02-02 11:06 ` Maciej W. Rozycki 2002-02-03 2:29 ` Ulrich Drepper 1 sibling, 0 replies; 75+ messages in thread From: Maciej W. Rozycki @ 2002-02-02 11:06 UTC (permalink / raw) To: H . J . Lu; +Cc: Hiroyuki Machida, libc-alpha, linux-mips On Fri, 1 Feb 2002, H . J . Lu wrote: > > Please do indent instructions in branch delay slots like it's done in > > other code (and here originally as well). It much improves the perception > > of what exactly is going on. > > Like this? Exactly. Thanks. -- + Maciej W. Rozycki, Technical University of Gdansk, Poland + +--------------------------------------------------------------+ + e-mail: macro@ds2.pg.gda.pl, PGP key available + ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-01 22:47 ` H . J . Lu 2002-02-02 11:06 ` Maciej W. Rozycki @ 2002-02-03 2:29 ` Ulrich Drepper 2002-02-03 2:29 ` Ulrich Drepper 1 sibling, 1 reply; 75+ messages in thread From: Ulrich Drepper @ 2002-02-03 2:29 UTC (permalink / raw) To: H . J . Lu; +Cc: Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips "H . J . Lu" <hjl@lucon.org> writes: > Like this? > [...] >From what I've seen this patch was generally agreed on. I've checked it in. -- ---------------. ,-. 1325 Chesapeake Terrace Ulrich Drepper \ ,-------------------' \ Sunnyvale, CA 94089 USA Red Hat `--' drepper at redhat.com `------------------------ ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: PATCH: Fix ll/sc for mips 2002-02-03 2:29 ` Ulrich Drepper @ 2002-02-03 2:29 ` Ulrich Drepper 0 siblings, 0 replies; 75+ messages in thread From: Ulrich Drepper @ 2002-02-03 2:29 UTC (permalink / raw) To: H . J . Lu; +Cc: Maciej W. Rozycki, Hiroyuki Machida, libc-alpha, linux-mips "H . J . Lu" <hjl@lucon.org> writes: > Like this? > [...] From what I've seen this patch was generally agreed on. I've checked it in. -- ---------------. ,-. 1325 Chesapeake Terrace Ulrich Drepper \ ,-------------------' \ Sunnyvale, CA 94089 USA Red Hat `--' drepper at redhat.com `------------------------ ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-01-31 22:17 ` Maciej W. Rozycki 2002-01-31 22:41 ` H . J . Lu @ 2002-01-31 23:33 ` Kaz Kylheku 2002-01-31 23:33 ` Kaz Kylheku 1 sibling, 1 reply; 75+ messages in thread From: Kaz Kylheku @ 2002-01-31 23:33 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: H . J . Lu, GNU C Library, linux-mips On Thu, 31 Jan 2002, Maciej W. Rozycki wrote: > Date: Thu, 31 Jan 2002 23:17:21 +0100 (MET) > From: Maciej W. Rozycki <macro@ds2.pg.gda.pl> > To: H . J . Lu <hjl@lucon.org> > Cc: GNU C Library <libc-alpha@sources.redhat.com>, linux-mips@oss.sgi.com > Subject: [libc-alpha] Re: PATCH: Fix ll/sc for mips > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > (__compare_and_swap): Return 0 when failed to compare or swap. > [...] > > * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when > > failed to compare or swap. > > Looking at the i486 implementation these are not expected to fail. > Unless I am missing something... That's what ``compare'' means in ``compare and swap''. You lock the memory location at some hardware level and then compare the location to the specified value. If there is a match, you change the memory location to the new value. Otherwise you don't, and indicate that you didn't. That's what ``fail'' means. The comparison failed, and the operation failed to install the new value. The caller must detect this and handle that. Algorithms based on atomic compare-and-swap take failure into account. ^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [libc-alpha] Re: PATCH: Fix ll/sc for mips 2002-01-31 23:33 ` [libc-alpha] " Kaz Kylheku @ 2002-01-31 23:33 ` Kaz Kylheku 0 siblings, 0 replies; 75+ messages in thread From: Kaz Kylheku @ 2002-01-31 23:33 UTC (permalink / raw) To: Maciej W. Rozycki; +Cc: H . J . Lu, GNU C Library, linux-mips On Thu, 31 Jan 2002, Maciej W. Rozycki wrote: > Date: Thu, 31 Jan 2002 23:17:21 +0100 (MET) > From: Maciej W. Rozycki <macro@ds2.pg.gda.pl> > To: H . J . Lu <hjl@lucon.org> > Cc: GNU C Library <libc-alpha@sources.redhat.com>, linux-mips@oss.sgi.com > Subject: [libc-alpha] Re: PATCH: Fix ll/sc for mips > > On Thu, 31 Jan 2002, H . J . Lu wrote: > > > (__compare_and_swap): Return 0 when failed to compare or swap. > [...] > > * sysdeps/mips/atomicity.h (compare_and_swap): Return 0 when > > failed to compare or swap. > > Looking at the i486 implementation these are not expected to fail. > Unless I am missing something... That's what ``compare'' means in ``compare and swap''. You lock the memory location at some hardware level and then compare the location to the specified value. If there is a match, you change the memory location to the new value. Otherwise you don't, and indicate that you didn't. That's what ``fail'' means. The comparison failed, and the operation failed to install the new value. The caller must detect this and handle that. Algorithms based on atomic compare-and-swap take failure into account. ^ permalink raw reply [flat|nested] 75+ messages in thread
end of thread, other threads:[~2002-02-07 8:36 UTC | newest]
Thread overview: 75+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-01-31 20:35 PATCH: Fix ll/sc for mips H . J . Lu
2002-01-31 22:17 ` Maciej W. Rozycki
2002-01-31 22:41 ` H . J . Lu
2002-02-01 3:35 ` Hiroyuki Machida
2002-02-01 4:02 ` [libc-alpha] " Kaz Kylheku
2002-02-01 4:02 ` Kaz Kylheku
2002-02-01 4:59 ` Hiroyuki Machida
2002-02-01 7:00 ` H . J . Lu
2002-02-01 11:12 ` Hiroyuki Machida
2002-02-01 10:49 ` Andreas Schwab
2002-02-01 11:23 ` Geoff Keating
2002-02-01 7:10 ` H . J . Lu
2002-02-01 7:17 ` H . J . Lu
2002-02-01 11:45 ` Maciej W. Rozycki
2002-02-01 18:29 ` PATCH: Fix ll/sc for mips (take 3) H . J . Lu
2002-02-01 23:01 ` Daniel Jacobowitz
2002-02-01 23:15 ` H . J . Lu
2002-02-02 2:37 ` Hiroyuki Machida
2002-02-04 9:32 ` Dominic Sweetman
2002-02-05 6:16 ` Jay Carlson
2002-02-05 8:28 ` Ralf Baechle
2002-02-05 15:10 ` Jay Carlson
2002-02-05 16:06 ` Jay Carlson
2002-02-02 3:26 ` Daniel Jacobowitz
2002-02-02 18:53 ` Justin Carlson
2002-02-02 20:03 ` H . J . Lu
2002-02-02 20:49 ` Hartvig Ekner
2002-02-02 20:49 ` Hartvig Ekner
2002-02-03 5:47 ` Justin Carlson
2002-02-04 19:17 ` Paul Koning
[not found] ` <mailpost.1012680250.7159@news-sj1-1>
2002-02-03 23:29 ` cgd
2002-02-04 6:07 ` Ralf Baechle
2002-02-04 9:46 ` Dominic Sweetman
2002-02-04 16:31 ` H . J . Lu
2002-02-04 16:46 ` Dominic Sweetman
2002-02-05 1:28 ` H . J . Lu
2002-02-05 2:58 ` Daniel Jacobowitz
2002-02-05 4:42 ` H . J . Lu
2002-02-05 4:47 ` Daniel Jacobowitz
2002-02-05 5:30 ` Justin Carlson
2002-02-05 8:39 ` Hartvig Ekner
2002-02-05 8:39 ` Hartvig Ekner
2002-02-05 11:37 ` Maciej W. Rozycki
2002-02-05 12:12 ` Ralf Baechle
2002-02-05 12:31 ` Maciej W. Rozycki
2002-02-05 12:38 ` Hartvig Ekner
2002-02-05 12:38 ` Hartvig Ekner
2002-02-05 13:28 ` Maciej W. Rozycki
2002-02-05 19:28 ` Hartvig Ekner
2002-02-05 19:28 ` Hartvig Ekner
2002-02-05 18:59 ` Ralf Baechle
2002-02-05 19:30 ` H . J . Lu
2002-02-05 21:54 ` H . J . Lu
2002-02-06 10:32 ` Ralf Baechle
2002-02-06 20:45 ` Why does -g turn off filling the delat slot? H . J . Lu
2002-02-06 21:00 ` PATCH: Modify the mips gas behavior for -g -O H . J . Lu
2002-02-06 21:16 ` Eric Christopher
2002-02-06 21:40 ` Ian Lance Taylor
2002-02-06 21:46 ` Eric Christopher
2002-02-06 22:00 ` PATCH: Define SUBTARGET_ASM_DEBUGGING_SPEC for Linux/mips H . J . Lu
2002-02-07 8:24 ` Eric Christopher
2002-02-06 11:37 ` PATCH: Fix ll/sc for mips (take 3) Maciej W. Rozycki
2002-02-04 17:44 ` cgd
2002-02-04 6:46 ` Ralf Baechle
2002-02-04 7:01 ` PATCH: Fix ll/sc for mips Ralf Baechle
2002-02-04 14:54 ` Maciej W. Rozycki
2002-02-01 11:50 ` Maciej W. Rozycki
2002-02-01 17:40 ` H . J . Lu
2002-02-01 21:41 ` Maciej W. Rozycki
2002-02-01 22:47 ` H . J . Lu
2002-02-02 11:06 ` Maciej W. Rozycki
2002-02-03 2:29 ` Ulrich Drepper
2002-02-03 2:29 ` Ulrich Drepper
2002-01-31 23:33 ` [libc-alpha] " Kaz Kylheku
2002-01-31 23:33 ` Kaz Kylheku
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox