netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] make atomic_t volatile on all architectures
@ 2007-08-08 23:07 Chris Snook
  2007-08-08 23:18 ` Jesper Juhl
                   ` (3 more replies)
  0 siblings, 4 replies; 34+ messages in thread
From: Chris Snook @ 2007-08-08 23:07 UTC (permalink / raw)
  To: akpm, torvalds, ak, heiko.carstens
  Cc: davem, linux-kernel, netdev, schwidefsky, wensong, horms, wjiang,
	cfriesen, zlynx

From: Chris Snook <csnook@redhat.com>

Some architectures currently do not declare the contents of an atomic_t to be
volatile.  This causes confusion since atomic_read() might not actually read
anything if an optimizing compiler re-uses a value stored in a register, which
can break code that loops until something external changes the value of an
atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
of all registers used in the loop, thus hurting performance instead of helping
it, particularly on architectures where it's unnecessary.  Since we generally
want to re-read the contents of an atomic variable on every access anyway,
let's standardize the behavior across all architectures and avoid the
performance and correctness problems of requiring the use of barrier() in
loops that expect atomic_t variables to change externally.  This is relevant
even on non-smp architectures, since drivers may use atomic operations in
interrupt handlers.

Signed-off-by: Chris Snook <csnook@redhat.com>


diff -urp linux-2.6.23-rc2-orig/include/asm-blackfin/atomic.h linux-2.6.23-rc2/include/asm-blackfin/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-blackfin/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-blackfin/atomic.h	2007-08-08 18:18:43.000000000 -0400
@@ -13,8 +13,13 @@
  * Tony Kou (tonyko@lineo.ca)   Lineo Inc.   2001
  */
 
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
 typedef struct {
-	int counter;
+	volatile int counter;
 } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
diff -urp linux-2.6.23-rc2-orig/include/asm-frv/atomic.h linux-2.6.23-rc2/include/asm-frv/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-frv/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-frv/atomic.h	2007-08-08 18:21:55.000000000 -0400
@@ -35,8 +35,13 @@
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
 typedef struct {
-	int counter;
+	volatile int counter;
 } atomic_t;
 
 #define ATOMIC_INIT(i)		{ (i) }
diff -urp linux-2.6.23-rc2-orig/include/asm-h8300/atomic.h linux-2.6.23-rc2/include/asm-h8300/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-h8300/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-h8300/atomic.h	2007-08-08 18:24:02.000000000 -0400
@@ -6,7 +6,12 @@
  * resource counting etc..
  */
 
-typedef struct { int counter; } atomic_t;
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
+typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff -urp linux-2.6.23-rc2-orig/include/asm-i386/atomic.h linux-2.6.23-rc2/include/asm-i386/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-i386/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-i386/atomic.h	2007-08-08 18:26:09.000000000 -0400
@@ -14,8 +14,12 @@
  * Make sure gcc doesn't try to be clever and move things around
  * on us. We need to use _exactly_ the address the user gave us,
  * not some alias that contains the same information.
+ *
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
  */
-typedef struct { int counter; } atomic_t;
+typedef struct { volatile int counter; } atomic_t;
 
 #define ATOMIC_INIT(i)	{ (i) }
 
diff -urp linux-2.6.23-rc2-orig/include/asm-m68k/atomic.h linux-2.6.23-rc2/include/asm-m68k/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-m68k/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-m68k/atomic.h	2007-08-08 18:28:58.000000000 -0400
@@ -13,7 +13,12 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
+typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff -urp linux-2.6.23-rc2-orig/include/asm-m68knommu/atomic.h linux-2.6.23-rc2/include/asm-m68knommu/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-m68knommu/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-m68knommu/atomic.h	2007-08-08 18:30:32.000000000 -0400
@@ -12,7 +12,12 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
+typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff -urp linux-2.6.23-rc2-orig/include/asm-s390/atomic.h linux-2.6.23-rc2/include/asm-s390/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-s390/atomic.h	2007-08-08 17:48:53.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-s390/atomic.h	2007-08-08 18:40:17.000000000 -0400
@@ -23,8 +23,13 @@
  * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
  */
 
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
 typedef struct {
-	int counter;
+	volatile int counter;
 } __attribute__ ((aligned (4))) atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
diff -urp linux-2.6.23-rc2-orig/include/asm-v850/atomic.h linux-2.6.23-rc2/include/asm-v850/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-v850/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-v850/atomic.h	2007-08-08 18:42:37.000000000 -0400
@@ -21,7 +21,12 @@
 #error SMP not supported
 #endif
 
-typedef struct { int counter; } atomic_t;
+/*
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
+ */
+typedef struct { volatile int counter; } atomic_t;
 
 #define ATOMIC_INIT(i)	{ (i) }
 
diff -urp linux-2.6.23-rc2-orig/include/asm-x86_64/atomic.h linux-2.6.23-rc2/include/asm-x86_64/atomic.h
--- linux-2.6.23-rc2-orig/include/asm-x86_64/atomic.h	2007-07-08 19:32:17.000000000 -0400
+++ linux-2.6.23-rc2/include/asm-x86_64/atomic.h	2007-08-08 18:44:18.000000000 -0400
@@ -21,8 +21,12 @@
  * Make sure gcc doesn't try to be clever and move things around
  * on us. We need to use _exactly_ the address the user gave us,
  * not some alias that contains the same information.
+ *
+ * Make atomic_t volatile to remove the need for barriers in loops that
+ * wait for an outside event.  We generally want to re-load the atomic_t
+ * variable each time anyway, but don't need to re-load everything else.
  */
-typedef struct { int counter; } atomic_t;
+typedef struct { volatile int counter; } atomic_t;
 
 #define ATOMIC_INIT(i)	{ (i) }
 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:07 [PATCH] make atomic_t volatile on all architectures Chris Snook
@ 2007-08-08 23:18 ` Jesper Juhl
  2007-08-08 23:31   ` Chris Snook
  2007-08-08 23:25 ` Lennert Buytenhek
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 34+ messages in thread
From: Jesper Juhl @ 2007-08-08 23:18 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On 09/08/2007, Chris Snook <csnook@redhat.com> wrote:
> From: Chris Snook <csnook@redhat.com>
>
> Some architectures currently do not declare the contents of an atomic_t to be
> volatile.  This causes confusion since atomic_read() might not actually read
> anything if an optimizing compiler re-uses a value stored in a register, which
> can break code that loops until something external changes the value of an
> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
> of all registers used in the loop, thus hurting performance instead of helping
> it, particularly on architectures where it's unnecessary.  Since we generally
> want to re-read the contents of an atomic variable on every access anyway,
> let's standardize the behavior across all architectures and avoid the
> performance and correctness problems of requiring the use of barrier() in
> loops that expect atomic_t variables to change externally.  This is relevant
> even on non-smp architectures, since drivers may use atomic operations in
> interrupt handlers.
>
> Signed-off-by: Chris Snook <csnook@redhat.com>
>

Hmm, I thought we were trying to move away from volatile since it is
very weakly defined and towards explicit barriers and locks...
Points to --> Documentation/volatile-considered-harmful.txt


-- 
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post  http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please      http://www.expita.com/nomime.html

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:07 [PATCH] make atomic_t volatile on all architectures Chris Snook
  2007-08-08 23:18 ` Jesper Juhl
@ 2007-08-08 23:25 ` Lennert Buytenhek
  2007-08-08 23:35   ` Chris Snook
  2007-08-09  1:03 ` Herbert Xu
  2007-08-09  4:18 ` Linus Torvalds
  3 siblings, 1 reply; 34+ messages in thread
From: Lennert Buytenhek @ 2007-08-08 23:25 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On Wed, Aug 08, 2007 at 07:07:33PM -0400, Chris Snook wrote:

> From: Chris Snook <csnook@redhat.com>
> 
> Some architectures currently do not declare the contents of an atomic_t to be
> volatile.  This causes confusion since atomic_read() might not actually read
> anything if an optimizing compiler re-uses a value stored in a register, which
> can break code that loops until something external changes the value of an
> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
> of all registers used in the loop, thus hurting performance instead of helping
> it, particularly on architectures where it's unnecessary.  Since we generally
> want to re-read the contents of an atomic variable on every access anyway,
> let's standardize the behavior across all architectures and avoid the
> performance and correctness problems of requiring the use of barrier() in
> loops that expect atomic_t variables to change externally.  This is relevant
> even on non-smp architectures, since drivers may use atomic operations in
> interrupt handlers.
> 
> Signed-off-by: Chris Snook <csnook@redhat.com>

Documentation/atomic_ops.txt would need updating:

	[...]

	One very important aspect of these two routines is that they DO NOT
	require any explicit memory barriers.  They need only perform the
	atomic_t counter update in an SMP safe manner.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:18 ` Jesper Juhl
@ 2007-08-08 23:31   ` Chris Snook
  2007-08-08 23:51     ` Jesper Juhl
  0 siblings, 1 reply; 34+ messages in thread
From: Chris Snook @ 2007-08-08 23:31 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Jesper Juhl wrote:
> On 09/08/2007, Chris Snook <csnook@redhat.com> wrote:
>> From: Chris Snook <csnook@redhat.com>
>>
>> Some architectures currently do not declare the contents of an atomic_t to be
>> volatile.  This causes confusion since atomic_read() might not actually read
>> anything if an optimizing compiler re-uses a value stored in a register, which
>> can break code that loops until something external changes the value of an
>> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
>> of all registers used in the loop, thus hurting performance instead of helping
>> it, particularly on architectures where it's unnecessary.  Since we generally
>> want to re-read the contents of an atomic variable on every access anyway,
>> let's standardize the behavior across all architectures and avoid the
>> performance and correctness problems of requiring the use of barrier() in
>> loops that expect atomic_t variables to change externally.  This is relevant
>> even on non-smp architectures, since drivers may use atomic operations in
>> interrupt handlers.
>>
>> Signed-off-by: Chris Snook <csnook@redhat.com>
>>
> 
> Hmm, I thought we were trying to move away from volatile since it is
> very weakly defined and towards explicit barriers and locks...
> Points to --> Documentation/volatile-considered-harmful.txt

This is a special case.  Usually, the use of volatile is just lazy.  In 
this case, it's probably necessary on at least some architectures, so we 
can't remove it everywhere unless we want to rewrite atomic.h completely 
in inline assembler for several architectures, and painstakingly verify 
all that work.  I would hope it's obvious that having consistent 
behavior on all architectures, or even at all compiler optimization 
levels within an architecture, can be agreed to be good.  Additionally, 
atomic_t variables are a rare exception, in that we pretty much always 
want to work with the latest value in RAM on every access.  Making this 
atomic will allow us to remove a bunch of barriers which do nothing but 
slow things down on most architectures.

I agree that the use of atomic_t in .c files is generally bad, but in 
certain special cases, hiding it inside defined data types may be worth 
the slight impurity, just as we sometimes tolerate lines longer than 80 
columns when "fixing" it makes things unreadable.

	-- Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:25 ` Lennert Buytenhek
@ 2007-08-08 23:35   ` Chris Snook
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Snook @ 2007-08-08 23:35 UTC (permalink / raw)
  To: Lennert Buytenhek
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Lennert Buytenhek wrote:
> On Wed, Aug 08, 2007 at 07:07:33PM -0400, Chris Snook wrote:
> 
>> From: Chris Snook <csnook@redhat.com>
>>
>> Some architectures currently do not declare the contents of an atomic_t to be
>> volatile.  This causes confusion since atomic_read() might not actually read
>> anything if an optimizing compiler re-uses a value stored in a register, which
>> can break code that loops until something external changes the value of an
>> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
>> of all registers used in the loop, thus hurting performance instead of helping
>> it, particularly on architectures where it's unnecessary.  Since we generally
>> want to re-read the contents of an atomic variable on every access anyway,
>> let's standardize the behavior across all architectures and avoid the
>> performance and correctness problems of requiring the use of barrier() in
>> loops that expect atomic_t variables to change externally.  This is relevant
>> even on non-smp architectures, since drivers may use atomic operations in
>> interrupt handlers.
>>
>> Signed-off-by: Chris Snook <csnook@redhat.com>
> 
> Documentation/atomic_ops.txt would need updating:
> 
> 	[...]
> 
> 	One very important aspect of these two routines is that they DO NOT
> 	require any explicit memory barriers.  They need only perform the
> 	atomic_t counter update in an SMP safe manner.

Thanks, I was looking for that.  I'll re-send shortly with my comment 
moved there.  People are already using atomic_t in a manner that implies 
the use of memory barriers and interrupt-safety, which is what the patch 
enforces.

	-- Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:31   ` Chris Snook
@ 2007-08-08 23:51     ` Jesper Juhl
  0 siblings, 0 replies; 34+ messages in thread
From: Jesper Juhl @ 2007-08-08 23:51 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On 09/08/2007, Chris Snook <csnook@redhat.com> wrote:
> Jesper Juhl wrote:
> > On 09/08/2007, Chris Snook <csnook@redhat.com> wrote:
> >> From: Chris Snook <csnook@redhat.com>
> >>
> >> Some architectures currently do not declare the contents of an atomic_t to be
> >> volatile.  This causes confusion since atomic_read() might not actually read
> >> anything if an optimizing compiler re-uses a value stored in a register, which
> >> can break code that loops until something external changes the value of an
> >> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
> >> of all registers used in the loop, thus hurting performance instead of helping
> >> it, particularly on architectures where it's unnecessary.  Since we generally
> >> want to re-read the contents of an atomic variable on every access anyway,
> >> let's standardize the behavior across all architectures and avoid the
> >> performance and correctness problems of requiring the use of barrier() in
> >> loops that expect atomic_t variables to change externally.  This is relevant
> >> even on non-smp architectures, since drivers may use atomic operations in
> >> interrupt handlers.
> >>
> >> Signed-off-by: Chris Snook <csnook@redhat.com>
> >>
> >
> > Hmm, I thought we were trying to move away from volatile since it is
> > very weakly defined and towards explicit barriers and locks...
> > Points to --> Documentation/volatile-considered-harmful.txt
>
> This is a special case.

I had a feeling it might be.

> Usually, the use of volatile is just lazy.  In
> this case, it's probably necessary on at least some architectures, so we
> can't remove it everywhere unless we want to rewrite atomic.h completely
> in inline assembler for several architectures, and painstakingly verify
> all that work.

Sounds quite unpleasant and actively harmful - keeping stuff in plain
readable C makes it easier to handle by most people.

> I would hope it's obvious that having consistent
> behavior on all architectures, or even at all compiler optimization
> levels within an architecture, can be agreed to be good.

Agreed, consistency is good.

>  Additionally,
> atomic_t variables are a rare exception, in that we pretty much always
> want to work with the latest value in RAM on every access.  Making this
> atomic will allow us to remove a bunch of barriers which do nothing but
> slow things down on most architectures.
>
I believe you meant to say "volatile" and not "atomic" above.  But
yes, if volatile is sufficiently defined to ensure it does what we
want in this case and using it means we can actually speed up the
kernel, then it does indeed sound reasonable. Especially since it's
confined to the implementation of atomic_t which most people shouldn't
be looking at anyway (but simply use) and when using an atomic_t it's
pretty reasonable to expect that it doesn't need any additional
locking or barriers since it's supposed to be *atomic*.

> I agree that the use of atomic_t in .c files is generally bad, but in
> certain special cases, hiding it inside defined data types may be worth
> the slight impurity, just as we sometimes tolerate lines longer than 80
> columns when "fixing" it makes things unreadable.
>
Can't argue with that.

It seems you've thought it through.  I just wanted to be sure that
this approach was actually the one that makes the most sense, and
you've convinced me of that :-)

-- 
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post  http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please      http://www.expita.com/nomime.html

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:07 [PATCH] make atomic_t volatile on all architectures Chris Snook
  2007-08-08 23:18 ` Jesper Juhl
  2007-08-08 23:25 ` Lennert Buytenhek
@ 2007-08-09  1:03 ` Herbert Xu
  2007-08-09  1:48   ` David Miller
  2007-08-09  7:47   ` Chris Snook
  2007-08-09  4:18 ` Linus Torvalds
  3 siblings, 2 replies; 34+ messages in thread
From: Herbert Xu @ 2007-08-09  1:03 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Chris Snook <csnook@redhat.com> wrote:
> 
> Some architectures currently do not declare the contents of an atomic_t to be
> volatile.  This causes confusion since atomic_read() might not actually read
> anything if an optimizing compiler re-uses a value stored in a register, which
> can break code that loops until something external changes the value of an
> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads

Such loops should always use something like cpu_relax() which comes
with a barrier.

> of all registers used in the loop, thus hurting performance instead of helping
> it, particularly on architectures where it's unnecessary.  Since we generally

Do you have an example of such a loop where performance is hurt by this?

The IPVS code that led to this patch was simply broken and has been
fixed to use cpu_relax().

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  1:03 ` Herbert Xu
@ 2007-08-09  1:48   ` David Miller
  2007-08-09  3:47     ` Paul E. McKenney
  2007-08-09  7:47   ` Chris Snook
  1 sibling, 1 reply; 34+ messages in thread
From: David Miller @ 2007-08-09  1:48 UTC (permalink / raw)
  To: herbert
  Cc: csnook, akpm, torvalds, ak, heiko.carstens, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 09 Aug 2007 09:03:27 +0800

> Such loops should always use something like cpu_relax() which comes
> with a barrier.

This is an excellent point.

And it needs to be weighed with the error prone'ness Andrew mentioned.
There probably is a middle ground somewhere.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  1:48   ` David Miller
@ 2007-08-09  3:47     ` Paul E. McKenney
  0 siblings, 0 replies; 34+ messages in thread
From: Paul E. McKenney @ 2007-08-09  3:47 UTC (permalink / raw)
  To: David Miller
  Cc: herbert, csnook, akpm, torvalds, ak, heiko.carstens, linux-kernel,
	netdev, schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On Wed, Aug 08, 2007 at 06:48:24PM -0700, David Miller wrote:
> From: Herbert Xu <herbert@gondor.apana.org.au>
> Date: Thu, 09 Aug 2007 09:03:27 +0800
> 
> > Such loops should always use something like cpu_relax() which comes
> > with a barrier.
> 
> This is an excellent point.
> 
> And it needs to be weighed with the error prone'ness Andrew mentioned.
> There probably is a middle ground somewhere.

OK...  I'll bite.  ACCESS_ONCE(), see http://lkml.org/lkml/2007/7/11/664.

This would allow ACCESS_ONCE(atomic_read(&x)) to be used where refetching
would be problematic, but allow the compiler free rein in cases where
refetching is OK.

The ACCESS_ONCE() primitive of course has its limitations as well, but
you did ask for a middle ground.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-08 23:07 [PATCH] make atomic_t volatile on all architectures Chris Snook
                   ` (2 preceding siblings ...)
  2007-08-09  1:03 ` Herbert Xu
@ 2007-08-09  4:18 ` Linus Torvalds
  2007-08-09  4:59   ` Jerry Jiang
  2007-08-09  7:31   ` Chris Snook
  3 siblings, 2 replies; 34+ messages in thread
From: Linus Torvalds @ 2007-08-09  4:18 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx



On Wed, 8 Aug 2007, Chris Snook wrote:
> 
> Some architectures currently do not declare the contents of an atomic_t to be
> volatile.  This causes confusion since atomic_read() might not actually read
> anything if an optimizing compiler re-uses a value stored in a register, which
> can break code that loops until something external changes the value of an
> atomic_t.

I'd be *much* happier with "atomic_read()" doing the "volatile" instead.

The fact is, volatile on data structures is a bug. It's a wart in the C 
language. It shouldn't be used. 

Volatile accesses in *code* can be ok, and if we have "atomic_read()" 
expand to a "*(volatile int *)&(x)->value", then I'd be ok with that.

But marking data structures volatile just makes the compiler screw up 
totally, and makes code for initialization sequences etc much worse.

		Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  4:18 ` Linus Torvalds
@ 2007-08-09  4:59   ` Jerry Jiang
  2007-08-09  7:31   ` Chris Snook
  1 sibling, 0 replies; 34+ messages in thread
From: Jerry Jiang @ 2007-08-09  4:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chris Snook, akpm, ak, heiko.carstens, davem, linux-kernel,
	netdev, schwidefsky, wensong, horms, cfriesen, zlynx

On Wed, 8 Aug 2007 21:18:25 -0700 (PDT)
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> 
> 
> On Wed, 8 Aug 2007, Chris Snook wrote:
> > 
> > Some architectures currently do not declare the contents of an atomic_t to be
> > volatile.  This causes confusion since atomic_read() might not actually read
> > anything if an optimizing compiler re-uses a value stored in a register, which
> > can break code that loops until something external changes the value of an
> > atomic_t.
> 
> I'd be *much* happier with "atomic_read()" doing the "volatile" instead.
> 
> The fact is, volatile on data structures is a bug. It's a wart in the C 
> language. It shouldn't be used. 

Why? It's a wart! Is it due to unclear C standard on volatile related point?

Why the *volatile-accesses-in-code* is acceptable, does C standard make it clear?

-- Jerry

> 
> Volatile accesses in *code* can be ok, and if we have "atomic_read()" 
> expand to a "*(volatile int *)&(x)->value", then I'd be ok with that.
> 
> But marking data structures volatile just makes the compiler screw up 
> totally, and makes code for initialization sequences etc much worse.
> 
> 		Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  4:18 ` Linus Torvalds
  2007-08-09  4:59   ` Jerry Jiang
@ 2007-08-09  7:31   ` Chris Snook
  2007-08-09  8:14     ` Heiko Carstens
  2007-08-09 17:36     ` Chuck Ebbert
  1 sibling, 2 replies; 34+ messages in thread
From: Chris Snook @ 2007-08-09  7:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: akpm, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Linus Torvalds wrote:
> 
> On Wed, 8 Aug 2007, Chris Snook wrote:
>> Some architectures currently do not declare the contents of an atomic_t to be
>> volatile.  This causes confusion since atomic_read() might not actually read
>> anything if an optimizing compiler re-uses a value stored in a register, which
>> can break code that loops until something external changes the value of an
>> atomic_t.
> 
> I'd be *much* happier with "atomic_read()" doing the "volatile" instead.
> 
> The fact is, volatile on data structures is a bug. It's a wart in the C 
> language. It shouldn't be used. 
> 
> Volatile accesses in *code* can be ok, and if we have "atomic_read()" 
> expand to a "*(volatile int *)&(x)->value", then I'd be ok with that.
> 
> But marking data structures volatile just makes the compiler screw up 
> totally, and makes code for initialization sequences etc much worse.
> 
> 		Linus

Fair enough.  Casting to (volatile int *) will give us the behavior people 
expect when using atomic_t without needing to use inefficient barriers.

While we have the hood up, should we convert all the atomic_t's to non-volatile 
and put volatile casts in all the atomic_reads?  I don't know enough about the 
various arches to say with confidence that those changes alone will preserve 
existing behavior.  We might need some arch-specific tweaking of the atomic 
operations.

	-- Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  1:03 ` Herbert Xu
  2007-08-09  1:48   ` David Miller
@ 2007-08-09  7:47   ` Chris Snook
  2007-08-09  8:30     ` Herbert Xu
  1 sibling, 1 reply; 34+ messages in thread
From: Chris Snook @ 2007-08-09  7:47 UTC (permalink / raw)
  To: Herbert Xu
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Herbert Xu wrote:
> Chris Snook <csnook@redhat.com> wrote:
>> Some architectures currently do not declare the contents of an atomic_t to be
>> volatile.  This causes confusion since atomic_read() might not actually read
>> anything if an optimizing compiler re-uses a value stored in a register, which
>> can break code that loops until something external changes the value of an
>> atomic_t.  Avoiding such bugs requires using barrier(), which causes re-loads
> 
> Such loops should always use something like cpu_relax() which comes
> with a barrier.

If they're not doing anything, sure.  Plenty of loops actually do some sort of 
real work while waiting for their halt condition, possibly even work which is 
necessary for their halt condition to occur, and you definitely don't want to be 
doing cpu_relax() in this case.  On register-rich architectures you can do quite 
a lot of work without needing to reuse the register containing the result of the 
atomic_read().  Those are precisely the architectures where barrier() hurts the 
most.

>> of all registers used in the loop, thus hurting performance instead of helping
>> it, particularly on architectures where it's unnecessary.  Since we generally
> 
> Do you have an example of such a loop where performance is hurt by this?

Not handy.  Perhaps more interesting are cases where we access the same atomic_t 
twice in a hot path.  If we can remove some of those barriers, those hot paths 
will get faster.

Performance was only part of the motivation.  The IPVS bug was an example of how 
atomic_t is assumed (not always correctly) to work, and other recent discussions 
on this list have made it clear that most people assume atomic_read() actually 
reads something every time, and don't even think to consult the documentation 
until they find out the hard way that it does not.  I'm not saying we should 
encourage lazy programming, but in this case the assumption is reasonable 
because that's how people actually use atomic_t, and making this behavior 
uniform across all architectures makes it more convenient to do things the right 
way, which we should encourage.

> The IPVS code that led to this patch was simply broken and has been
> fixed to use cpu_relax().

I agree, busy-waiting should be done with cpu_relax(), if at all.  I'm more 
concerned about cases that are not busy-waiting, but could still get compiled 
with the same optimization.

	-- Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  7:31   ` Chris Snook
@ 2007-08-09  8:14     ` Heiko Carstens
  2007-08-09 17:36     ` Chuck Ebbert
  1 sibling, 0 replies; 34+ messages in thread
From: Heiko Carstens @ 2007-08-09  8:14 UTC (permalink / raw)
  To: Chris Snook
  Cc: Linus Torvalds, akpm, ak, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On Thu, Aug 09, 2007 at 03:31:10AM -0400, Chris Snook wrote:
>  Linus Torvalds wrote:
> > I'd be *much* happier with "atomic_read()" doing the "volatile" instead.
> > The fact is, volatile on data structures is a bug. It's a wart in the C 
> > language. It shouldn't be used. Volatile accesses in *code* can be ok, and 
> > if we have "atomic_read()" expand to a "*(volatile int *)&(x)->value", then 
> > I'd be ok with that.
> > But marking data structures volatile just makes the compiler screw up 
> > totally, and makes code for initialization sequences etc much worse.
> > 		Linus
> 
>  Fair enough.  Casting to (volatile int *) will give us the behavior people 
>  expect when using atomic_t without needing to use inefficient barriers.
> 
>  While we have the hood up, should we convert all the atomic_t's to 
>  non-volatile and put volatile casts in all the atomic_reads?  I don't know 
>  enough about the various arches to say with confidence that those changes 
>  alone will preserve existing behavior.  We might need some arch-specific 
>  tweaking of the atomic operations.

If you write that patch could you include the atomic64 variants as well,
please? Besides that just post the patch to linux-arch and maintainers
should speak up.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  7:47   ` Chris Snook
@ 2007-08-09  8:30     ` Herbert Xu
  2007-08-09 11:44       ` Chris Snook
  0 siblings, 1 reply; 34+ messages in thread
From: Herbert Xu @ 2007-08-09  8:30 UTC (permalink / raw)
  To: Chris Snook
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On Thu, Aug 09, 2007 at 03:47:57AM -0400, Chris Snook wrote:
>
> If they're not doing anything, sure.  Plenty of loops actually do some sort 
> of real work while waiting for their halt condition, possibly even work 
> which is necessary for their halt condition to occur, and you definitely 
> don't want to be doing cpu_relax() in this case.  On register-rich 
> architectures you can do quite a lot of work without needing to reuse the 
> register containing the result of the atomic_read().  Those are precisely 
> the architectures where barrier() hurts the most.

I have a problem with this argument.  The same loop could be
using a non-atomic as long as the updaters are serialised.  Would
you suggest that we turn such non-atomics into volatiles too?

Any loop that's waiting for an external halt condition either
has to schedule away (which is a barrier) or you'd be busy
waiting in which case you should use cpu_relax.

Do you have an example where this isn't the case?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
       [not found]   ` <8Q8rD-hh-7@gated-at.bofh.it>
@ 2007-08-09  9:10     ` Bodo Eggert
  2007-08-09  9:18       ` Jerry Jiang
  0 siblings, 1 reply; 34+ messages in thread
From: Bodo Eggert @ 2007-08-09  9:10 UTC (permalink / raw)
  To: Jerry Jiang, Linus Torvalds, Chris Snook, akpm, ak,
	heiko.carstens

Jerry Jiang <wjiang@resilience.com> wrote:
> On Wed, 8 Aug 2007 21:18:25 -0700 (PDT)
>> On Wed, 8 Aug 2007, Chris Snook wrote:

>> > Some architectures currently do not declare the contents of an atomic_t to
>> > be
>> > volatile.  This causes confusion since atomic_read() might not actually
>> > read anything if an optimizing compiler re-uses a value stored in a
>> > register, which can break code that loops until something external changes
>> > the value of an atomic_t.
>> 
>> I'd be *much* happier with "atomic_read()" doing the "volatile" instead.
>> 
>> The fact is, volatile on data structures is a bug. It's a wart in the C
>> language. It shouldn't be used.
> 
> Why? It's a wart! Is it due to unclear C standard on volatile related point?
> 
> Why the *volatile-accesses-in-code* is acceptable, does C standard make it
> clear?

http://lwn.net/Articles/233482/
-- 
Fun things to slip into your budget
Heisenberg Compensator upgrade kit

Friß, Spammer: uWfuXeviZ@x.7eggert.dyndns.org

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  9:10     ` Bodo Eggert
@ 2007-08-09  9:18       ` Jerry Jiang
  2007-08-09 15:00         ` Linus Torvalds
  0 siblings, 1 reply; 34+ messages in thread
From: Jerry Jiang @ 2007-08-09  9:18 UTC (permalink / raw)
  To: 7eggert
  Cc: Linus Torvalds, Chris Snook, akpm, ak, heiko.carstens, davem,
	linux-kernel, netdev, schwidefsky, wensong, horms, cfriesen,
	zlynx

On Thu, 09 Aug 2007 11:10:16 +0200
Bodo Eggert <7eggert@gmx.de> wrote:

> > 
> > Why the *volatile-accesses-in-code* is acceptable, does C standard make it
> > clear?
> 
> http://lwn.net/Articles/233482/

I have read this article before, but What Linus said only focusing on
the conclusion-- The semantics of it are so unclear as
to be totally useless.

and still not to said "Why the *volatile-accesses-in-code* is
acceptable"

-- Jerry


> -- 
> Fun things to slip into your budget
> Heisenberg Compensator upgrade kit
> 
> Friß, Spammer: uWfuXeviZ@x.7eggert.dyndns.org

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  8:30     ` Herbert Xu
@ 2007-08-09 11:44       ` Chris Snook
  0 siblings, 0 replies; 34+ messages in thread
From: Chris Snook @ 2007-08-09 11:44 UTC (permalink / raw)
  To: Herbert Xu
  Cc: akpm, torvalds, ak, heiko.carstens, davem, linux-kernel, netdev,
	schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

Herbert Xu wrote:
> On Thu, Aug 09, 2007 at 03:47:57AM -0400, Chris Snook wrote:
>> If they're not doing anything, sure.  Plenty of loops actually do some sort 
>> of real work while waiting for their halt condition, possibly even work 
>> which is necessary for their halt condition to occur, and you definitely 
>> don't want to be doing cpu_relax() in this case.  On register-rich 
>> architectures you can do quite a lot of work without needing to reuse the 
>> register containing the result of the atomic_read().  Those are precisely 
>> the architectures where barrier() hurts the most.
> 
> I have a problem with this argument.  The same loop could be
> using a non-atomic as long as the updaters are serialised.  Would
> you suggest that we turn such non-atomics into volatiles too?

No.  I'm simply saying that when people call atomic_read, they expect a read to 
occur.  When this doesn't happen, people get confused.  Merely referencing a 
variable doesn't carry the same connotation.

Anyway, I'm taking Linus's advice and putting the cast in atomic_read(), rather 
than the counter declaration itself.  Everything else uses __asm__ __volatile__, 
or calls atomic_read() with interrupts disabled.  This ensures that 
atomic_read() works as expected across all architectures, without the cruft the 
compiler generates when you declare the variable itself volatile.

> Any loop that's waiting for an external halt condition either
> has to schedule away (which is a barrier) or you'd be busy
> waiting in which case you should use cpu_relax.

Not necessarily.  Some code uses atomic_t for a sort of lightweight semaphore. 
If your loop is actually doing real work, perhaps in a softirq handler 
negotiating shared resources with a hard irq handler, you're not busy-waiting.

> Do you have an example where this isn't the case?

a) No, and that's sort of the point.  We shouldn't have to audit the whole 
kernel to find cases where a misleadingly-named function is doing what its users 
are not expecting.  If we can make it always do the right thing without any 
substantial drawbacks, we should.

b) Loops are just one case, which came to mind because of the IPVS bug recently 
discussed.  I recall seeing some scheduler code recently which does an 
atomic_read() twice on the same variable, with a barrier in between.  It's in a 
very hot path, so if we can remove that barrier, we save a bunch of register 
loads.  When you're context switching every microsecond in SCHED_RR, that really 
matters.

	-- Chris

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  9:18       ` Jerry Jiang
@ 2007-08-09 15:00         ` Linus Torvalds
  0 siblings, 0 replies; 34+ messages in thread
From: Linus Torvalds @ 2007-08-09 15:00 UTC (permalink / raw)
  To: Jerry Jiang
  Cc: 7eggert, Chris Snook, akpm, ak, heiko.carstens, davem,
	linux-kernel, netdev, schwidefsky, wensong, horms, cfriesen,
	zlynx



On Thu, 9 Aug 2007, Jerry Jiang wrote:
> 
> and still not to said "Why the *volatile-accesses-in-code* is
> acceptable"

I don't think volatile is necessarily wonderful in code _either_. So I 
think the "atomic_read()" issue would be even better off if we just made 
sure everybody behaves well and has the right barriers.

So the difference between "volatile in code" and "volatile on data 
structures" is that the latter is *always* wrong (and wrong for very 
fundamental reasons). 

But "volatile in code" can be perfectly fine. If you hide the volatile in 
an accessor function, and just specify that the rules for that function is 
that it always loads from memory but doesn't imply any memory barriers, 
than that is fine. And I think those kinds of semantics may well be 
perfectly sane for "atomic_read()".

So I think a volatile access inside "atomic_read()" at least has 
well-defined semantics. Are they the best possible semantics? I dunno. I 
can imagine that it's perfectly fine, but on the other hand, it would be 
interesting to see any code for which it matters - it's entirely possible 
that the code itself is the problem, and should instead have a 
"cpu_relax()" or similar that implies a barrier.

			Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09  7:31   ` Chris Snook
  2007-08-09  8:14     ` Heiko Carstens
@ 2007-08-09 17:36     ` Chuck Ebbert
  2007-08-09 17:55       ` Linus Torvalds
  2007-08-09 17:57       ` Martin Schwidefsky
  1 sibling, 2 replies; 34+ messages in thread
From: Chuck Ebbert @ 2007-08-09 17:36 UTC (permalink / raw)
  To: Chris Snook
  Cc: Linus Torvalds, akpm, ak, heiko.carstens, davem, linux-kernel,
	netdev, schwidefsky, wensong, horms, wjiang, cfriesen, zlynx

On 08/09/2007 03:31 AM, Chris Snook wrote:
> 
> Fair enough.  Casting to (volatile int *) will give us the behavior
> people expect when using atomic_t without needing to use inefficient
> barriers.
> 

You can use this forget() macro to make the compiler reread a variable:

#define forget(var) asm volatile ("" : "=m"(var))

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09 17:36     ` Chuck Ebbert
@ 2007-08-09 17:55       ` Linus Torvalds
  2007-08-09 18:20         ` Martin Schwidefsky
  2007-08-09 17:57       ` Martin Schwidefsky
  1 sibling, 1 reply; 34+ messages in thread
From: Linus Torvalds @ 2007-08-09 17:55 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Chris Snook, akpm, ak, heiko.carstens, davem, linux-kernel,
	netdev, schwidefsky, wensong, horms, wjiang, cfriesen, zlynx



On Thu, 9 Aug 2007, Chuck Ebbert wrote:
> 
> You can use this forget() macro to make the compiler reread a variable:
> 
> #define forget(var) asm volatile ("" : "=m"(var))

No. That will also make the compiler "forget" any previous writes to it, 
so it changes behaviour.

You'd have to use "+m".

		Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09 17:36     ` Chuck Ebbert
  2007-08-09 17:55       ` Linus Torvalds
@ 2007-08-09 17:57       ` Martin Schwidefsky
  1 sibling, 0 replies; 34+ messages in thread
From: Martin Schwidefsky @ 2007-08-09 17:57 UTC (permalink / raw)
  To: Chuck Ebbert
  Cc: Chris Snook, Linus Torvalds, akpm, ak, heiko.carstens, davem,
	linux-kernel, netdev, wensong, horms, wjiang, cfriesen, zlynx

On Thu, 2007-08-09 at 13:36 -0400, Chuck Ebbert wrote:
> > Fair enough.  Casting to (volatile int *) will give us the behavior
> > people expect when using atomic_t without needing to use inefficient
> > barriers.
> > 
> 
> You can use this forget() macro to make the compiler reread a
> variable:
> 
> #define forget(var) asm volatile ("" : "=m"(var))

You need to specify a "m"(var) input constraint as well. Without it the
compiler might remove the initialization of var. E.g.

void fn(void)
{
	int var = 0;
	forget(var);
	/* now var can have any value. */
}

-- 
blue skies,
  Martin.

"Reality continues to ruin my life." - Calvin.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09 17:55       ` Linus Torvalds
@ 2007-08-09 18:20         ` Martin Schwidefsky
  2007-08-12  5:53           ` Segher Boessenkool
  0 siblings, 1 reply; 34+ messages in thread
From: Martin Schwidefsky @ 2007-08-09 18:20 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Chuck Ebbert, Chris Snook, akpm, ak, heiko.carstens, davem,
	linux-kernel, netdev, wensong, horms, wjiang, cfriesen, zlynx

On Thu, 2007-08-09 at 10:55 -0700, Linus Torvalds wrote:
> > You can use this forget() macro to make the compiler reread a variable:
> > 
> > #define forget(var) asm volatile ("" : "=m"(var))
> 
> No. That will also make the compiler "forget" any previous writes to it, 
> so it changes behaviour.
> 
> You'd have to use "+m".

Yes, though I would use "=m" on the output list and "m" on the input
list. The reason is that I've seen gcc fall on its face with an ICE on
s390 due to "+m". The explanation I've got from our compiler people was
quite esoteric, as far as I remember gcc splits "+m" to an input operand
and an output operand. Now it can happen that the compiler chooses two
different registers to access the same memory location. "+m" requires
that the two memory references are identical which causes the ICE if
they are not. I do not know if the current compilers still do this. Has
anyone else seen this happen ?

-- 
blue skies,
  Martin.

"Reality continues to ruin my life." - Calvin.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-09 18:20         ` Martin Schwidefsky
@ 2007-08-12  5:53           ` Segher Boessenkool
  2007-08-12  6:09             ` Linus Torvalds
  2007-08-12  9:47             ` Martin Schwidefsky
  0 siblings, 2 replies; 34+ messages in thread
From: Segher Boessenkool @ 2007-08-12  5:53 UTC (permalink / raw)
  To: schwidefsky
  Cc: wjiang, Linus Torvalds, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook

>> You'd have to use "+m".
>
> Yes, though I would use "=m" on the output list and "m" on the input
> list. The reason is that I've seen gcc fall on its face with an ICE on
> s390 due to "+m". The explanation I've got from our compiler people was
> quite esoteric, as far as I remember gcc splits "+m" to an input 
> operand
> and an output operand. Now it can happen that the compiler chooses two
> different registers to access the same memory location. "+m" requires
> that the two memory references are identical which causes the ICE if
> they are not.

The problem is very nicely described here, last paragraph:
<http://gcc.gnu.org/ml/gcc-patches/2007-07/msg01816.html>

It's not a problem anymore in (very) recent GCC, although
that of course won't help you in the kernel (yet).

> I do not know if the current compilers still do this. Has
> anyone else seen this happen ?

In recent GCC, it's actually documented:

	 The ordinary output operands must be write-only; GCC will assume that
	the values in these operands before the instruction are dead and need
	not be generated.  Extended asm supports input-output or read-write
	operands.  Use the constraint character `+' to indicate such an operand
	and list it with the output operands.  You should only use read-write
	operands when the constraints for the operand (or the operand in which
	only some of the bits are to be changed) allow a register.

Note that last line.


Segher


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  5:53           ` Segher Boessenkool
@ 2007-08-12  6:09             ` Linus Torvalds
  2007-08-12  9:48               ` Martin Schwidefsky
  2007-08-12 10:27               ` Segher Boessenkool
  2007-08-12  9:47             ` Martin Schwidefsky
  1 sibling, 2 replies; 34+ messages in thread
From: Linus Torvalds @ 2007-08-12  6:09 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: schwidefsky, wjiang, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook



On Sun, 12 Aug 2007, Segher Boessenkool wrote:
>
> Note that last line.

Segher, how about you just accept that Linux uses gcc as per reality, and 
that sometimes the reality is different from your expectations?

"+m" works. We use it. It's better than the alternatives. Pointing to 
stale documentation doesn't change anything.

The same is true of accesses through a volatile pointer. The kernel has 
done that for a *loong* time (all MMIO IO is done that way), and it's 
totally pointless to say that "volatile" isn't guaranteed to do what it 
does. It works, and quite frankly, if it didn't work, it would be a gcc 
BUG.

And again, this is not a "C standard" issue. It's a "sane implementation" 
issue. Linux expects the compiler to be sane. If it isn't, that's not our 
problem. gcc *is* sane, and I don't see why you constantly act as if it 
wasn't.

		Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  5:53           ` Segher Boessenkool
  2007-08-12  6:09             ` Linus Torvalds
@ 2007-08-12  9:47             ` Martin Schwidefsky
  2007-08-12 10:35               ` Segher Boessenkool
  1 sibling, 1 reply; 34+ messages in thread
From: Martin Schwidefsky @ 2007-08-12  9:47 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: wjiang, Linus Torvalds, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook

On Sun, 2007-08-12 at 07:53 +0200, Segher Boessenkool wrote:
> > Yes, though I would use "=m" on the output list and "m" on the input
> > list. The reason is that I've seen gcc fall on its face with an ICE on
> > s390 due to "+m". The explanation I've got from our compiler people was
> > quite esoteric, as far as I remember gcc splits "+m" to an input 
> > operand
> > and an output operand. Now it can happen that the compiler chooses two
> > different registers to access the same memory location. "+m" requires
> > that the two memory references are identical which causes the ICE if
> > they are not.
> 
> The problem is very nicely described here, last paragraph:
> <http://gcc.gnu.org/ml/gcc-patches/2007-07/msg01816.html>
> 
> It's not a problem anymore in (very) recent GCC, although
> that of course won't help you in the kernel (yet).

So you are saying that gcc 3.x still has this problem ?

> > I do not know if the current compilers still do this. Has
> > anyone else seen this happen ?
> 
> In recent GCC, it's actually documented:
> 
> 	 The ordinary output operands must be write-only; GCC will assume that
> 	the values in these operands before the instruction are dead and need
> 	not be generated.  Extended asm supports input-output or read-write
> 	operands.  Use the constraint character `+' to indicate such an operand
> 	and list it with the output operands.  You should only use read-write
> 	operands when the constraints for the operand (or the operand in which
> 	only some of the bits are to be changed) allow a register.
> 
> Note that last line.

I see, thanks for the info. 

-- 
blue skies,
  Martin.

"Reality continues to ruin my life." - Calvin.



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  6:09             ` Linus Torvalds
@ 2007-08-12  9:48               ` Martin Schwidefsky
  2007-08-12  9:54                 ` Linus Torvalds
  2007-08-12 16:30                 ` Segher Boessenkool
  2007-08-12 10:27               ` Segher Boessenkool
  1 sibling, 2 replies; 34+ messages in thread
From: Martin Schwidefsky @ 2007-08-12  9:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Segher Boessenkool, wjiang, wensong, heiko.carstens, linux-kernel,
	ak, cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook

On Sat, 2007-08-11 at 23:09 -0700, Linus Torvalds wrote: 
> Segher, how about you just accept that Linux uses gcc as per reality, and 
> that sometimes the reality is different from your expectations?
> 
> "+m" works. We use it. It's better than the alternatives. Pointing to 
> stale documentation doesn't change anything.

Well, perhaps on i386. I've seen some older versions of the s390 gcc die
with an ICE because I have used "+m" in some kernel inline assembly. I'm
happy to hear that this issue is fixed in recent gcc. Now I'll have to
find out if this is already true with gcc 3.x. The duplication "=m" and
"m" with the same constraint is rather annoying.

-- 
blue skies,
  Martin.

"Reality continues to ruin my life." - Calvin.




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  9:48               ` Martin Schwidefsky
@ 2007-08-12  9:54                 ` Linus Torvalds
  2007-08-12 16:30                 ` Segher Boessenkool
  1 sibling, 0 replies; 34+ messages in thread
From: Linus Torvalds @ 2007-08-12  9:54 UTC (permalink / raw)
  To: Martin Schwidefsky
  Cc: Segher Boessenkool, wjiang, wensong, heiko.carstens, linux-kernel,
	ak, cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook



On Sun, 12 Aug 2007, Martin Schwidefsky wrote:
> 
> The duplication "=m" and "m" with the same constraint is rather 
> annoying.

It's not only annoying, it causes gcc to generate bad code too. At least 
certain versions of gcc will generate the address *twice*, even if there 
is obviously only one address used.

If you have problems with "+m", you are often actually better off using 
just "m" and then adding a memory clobber. But that has other code 
generation downsides.

			Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  6:09             ` Linus Torvalds
  2007-08-12  9:48               ` Martin Schwidefsky
@ 2007-08-12 10:27               ` Segher Boessenkool
  2007-08-12 17:59                 ` Linus Torvalds
  1 sibling, 1 reply; 34+ messages in thread
From: Segher Boessenkool @ 2007-08-12 10:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: wjiang, wensong, heiko.carstens, linux-kernel, ak, cfriesen,
	netdev, horms, akpm, schwidefsky, Chuck Ebbert, davem, zlynx,
	Chris Snook

>> Note that last line.
>
> Segher, how about you just accept that Linux uses gcc as per reality, 
> and
> that sometimes the reality is different from your expectations?
>
> "+m" works.

It works _most of the time_.  Ask Martin.  Oh you don't even have to,
he told you two mails ago.  My last mail simply pointed out that this
isn't a GCC bug, but merely documented behaviour.  Well okay, it was
documented only after people found problems with it; it is an edge
case and pretty hard to trigger (impossible to trigger on RISC
architectures, and not very likely on x86 either, for different
reasons).

Since "realistic" people keep using "+m" anyhow, current GCC has added
a workaround that splits "+m" into an "=m" and an "m" constraint.  It
likely will be accepted as a permanent feature.  However, that 
workaround
doesn't yet exist on some of the compilers Linux still allows building
with (but the problem does).

> We use it. It's better than the alternatives.

How is this better than simply writing the slightly more verbose
asm("..." : "=m"(XX) : "m"(XX)) ?  It's a few more characters.
asm() is hard to write (correctly) anyway.  I don't see the problem.

> Pointing to stale documentation doesn't change anything.

It's not "stale" documentation, it's freshly built from GCC mainline.
It's also not "stale" in the sense that it isn't updated; in fact,
the email I pointed to is from a thread where a change in this exact
paragraph in the documentation was suggested (a couple of weeks ago).

> The same is true of accesses through a volatile pointer. The kernel has
> done that for a *loong* time (all MMIO IO is done that way),

(All MMIO on certain architectures).  This however is a completely
different case; there _is_ no underlying C object declared anywhere,
so no way can GCC prove that that object isn't volatile, so it _has_
to assume it is.

Also, in case of e.g. readb(), if the compiler can prove the resulting
value isn't used it doesn't have to perform the access at all -- the
documentation (again, not "stale" documentation) points this out quite
literally.

> and it's
> totally pointless to say that "volatile" isn't guaranteed to do what it
> does.

I actually asked a couple of other GCC developers last night, and
they all agreed with me.  If you look at historic GCC problem reports
(I pointed at a few earlier in these threads) you would see that the
situation is far from clear.

> It works, and quite frankly, if it didn't work, it would be a gcc
> BUG.

How can it be a bug, if the semantics you think are guaranteed are
guaranteed in your (and others') minds only?

Of course, GCC likes to cater to its users, and tries to make things
work the way the programmer probably intended.  However it a) cannot
*actually* read your mind, it just looks that way; b) it _also_ has
to implement the _correct_ semantics of volatile; c) since this isn't
part of any C version (no, not GNU99 or similar either), sometimes
GCC developers do not think about what some people expect the compiler
should do with such code, but just implement the requirements they
*know* exist.  And that's when "BUGs" happen.

If you want GCC to do what you want it to do instead of what it does,
why not file an enhancement request?  <http://gcc.gnu.org/bugzilla>.

To save you the trouble, I just did: <gcc.gnu.org/PR33053>.  Let's
see what comes from it.

> And again, this is not a "C standard" issue. It's a "sane 
> implementation"
> issue.

Maybe what you consider sane isn't as clear-cut as you think?  I'm
not alone with this opinion, as the mailing lists archives readily
show.

> Linux expects the compiler to be sane.

And the compiler expects the code it is asked to translate to
be reasonably sane.  Nothing new here.  The compiler is more
forgiving than the Linux code, IMHO; that's why I suggested
using the obviously correct asm implementation of the atomic
get/set, rather than using the not-so-obviously-correct and
error-prone volatile thing.  The semantics of the asm() are
exactly the semantics you expect from an atomic get/set, while
that of the volatile implementation are *not*; and on top of
that it is very well-tested well-understood technology.

> If it isn't, that's not our problem.

What, if the compiler doesn't work as you expect, like a silent
miscompilation, or even only the compiler ICEs, that is not a
problem for users and/or developers of Linux?  Interesting
opinion.  I'd rather stay clear of known pitfalls.

> gcc *is* sane,

Yes, it is.

> and I don't see why you constantly act as if it wasn't.

I guess I didn't express myself clearly enough then.  Hopefully
some other people _did_ understand what I meant.  In very harsh
words: "not all (proposed) kernel code is sane".


Segher


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  9:47             ` Martin Schwidefsky
@ 2007-08-12 10:35               ` Segher Boessenkool
  0 siblings, 0 replies; 34+ messages in thread
From: Segher Boessenkool @ 2007-08-12 10:35 UTC (permalink / raw)
  To: schwidefsky
  Cc: wjiang, Linus Torvalds, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook

>>> Yes, though I would use "=m" on the output list and "m" on the input
>>> list. The reason is that I've seen gcc fall on its face with an ICE 
>>> on
>>> s390 due to "+m". The explanation I've got from our compiler people 
>>> was
>>> quite esoteric, as far as I remember gcc splits "+m" to an input
>>> operand
>>> and an output operand. Now it can happen that the compiler chooses 
>>> two
>>> different registers to access the same memory location. "+m" requires
>>> that the two memory references are identical which causes the ICE if
>>> they are not.
>>
>> The problem is very nicely described here, last paragraph:
>> <http://gcc.gnu.org/ml/gcc-patches/2007-07/msg01816.html>
>>
>> It's not a problem anymore in (very) recent GCC, although
>> that of course won't help you in the kernel (yet).
>
> So you are saying that gcc 3.x still has this problem ?

Yes.  A warning ("read-write constraint does not allow a register")
was added for GCC-3.4, but the fix/workaround is more recent
(4.2 I believe, perhaps it was backported to the 4.1 branch).

>>> I do not know if the current compilers still do this. Has
>>> anyone else seen this happen ?
>>
>> In recent GCC, it's actually documented:
>>
>> 	 The ordinary output operands must be write-only; GCC will assume 
>> that
>> 	the values in these operands before the instruction are dead and need
>> 	not be generated.  Extended asm supports input-output or read-write
>> 	operands.  Use the constraint character `+' to indicate such an 
>> operand
>> 	and list it with the output operands.  You should only use read-write
>> 	operands when the constraints for the operand (or the operand in 
>> which
>> 	only some of the bits are to be changed) allow a register.
>>
>> Note that last line.
>
> I see, thanks for the info.

My pleasure.


Segher

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12  9:48               ` Martin Schwidefsky
  2007-08-12  9:54                 ` Linus Torvalds
@ 2007-08-12 16:30                 ` Segher Boessenkool
  2007-08-12 18:11                   ` Linus Torvalds
  1 sibling, 1 reply; 34+ messages in thread
From: Segher Boessenkool @ 2007-08-12 16:30 UTC (permalink / raw)
  To: schwidefsky
  Cc: wjiang, Linus Torvalds, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook

>> "+m" works. We use it. It's better than the alternatives. Pointing to
>> stale documentation doesn't change anything.
>
> Well, perhaps on i386. I've seen some older versions of the s390 gcc 
> die
> with an ICE because I have used "+m" in some kernel inline assembly. 
> I'm
> happy to hear that this issue is fixed in recent gcc. Now I'll have to
> find out if this is already true with gcc 3.x.

It was fixed (that is, "+m" is translated into a separate read
and write by GCC itself) in GCC-4.0.0, I just learnt.

> The duplication "=m" and "m" with the same constraint is rather 
> annoying.

Yeah.  Compiler errors are more annoying though I dare say ;-)


Segher


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12 10:27               ` Segher Boessenkool
@ 2007-08-12 17:59                 ` Linus Torvalds
  0 siblings, 0 replies; 34+ messages in thread
From: Linus Torvalds @ 2007-08-12 17:59 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: wjiang, wensong, heiko.carstens, linux-kernel, ak, cfriesen,
	netdev, horms, akpm, schwidefsky, Chuck Ebbert, davem, zlynx,
	Chris Snook



On Sun, 12 Aug 2007, Segher Boessenkool wrote:
> 
> It works _most of the time_.

It used to have problems. Gcc has had problems in various areas.

>			  Ask Martin.  Oh you don't even have to,
> he told you two mails ago.  My last mail simply pointed out that this
> isn't a GCC bug, but merely documented behaviour.

It was *not* documented behaviour, and there were gcc people who actively 
*encouraged* use of "+m", and actually fixed gcc.

The documentation is buggy.

Live with it.

			Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12 16:30                 ` Segher Boessenkool
@ 2007-08-12 18:11                   ` Linus Torvalds
  2007-08-12 19:13                     ` Segher Boessenkool
  0 siblings, 1 reply; 34+ messages in thread
From: Linus Torvalds @ 2007-08-12 18:11 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: schwidefsky, wjiang, wensong, heiko.carstens, linux-kernel, ak,
	cfriesen, netdev, horms, akpm, Chuck Ebbert, davem, zlynx,
	Chris Snook



On Sun, 12 Aug 2007, Segher Boessenkool wrote:
> 
> Yeah.  Compiler errors are more annoying though I dare say ;-)

Actually, compile-time errors are fine, and easy to work around. *Much* 
more annoying is when gcc actively generates subtly bad code. We've had 
use-after-free issues due to incorrect gcc liveness calculations etc, and 
inline asm has beeen one of the more common causes - exactly because the 
kernel is one of the few users (along with glibc) that uses it at all.

Now *those* are hard to find - the code works most of the time, but the 
compiler has inserted a really subtle race condition into the code 
(deallocated a local stack entry before last use). We had that with our 
semaphore code at some point.

		Linus

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] make atomic_t volatile on all architectures
  2007-08-12 18:11                   ` Linus Torvalds
@ 2007-08-12 19:13                     ` Segher Boessenkool
  0 siblings, 0 replies; 34+ messages in thread
From: Segher Boessenkool @ 2007-08-12 19:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: wjiang, wensong, heiko.carstens, linux-kernel, ak, cfriesen,
	netdev, horms, akpm, schwidefsky, Chuck Ebbert, davem, zlynx,
	Chris Snook

>> Yeah.  Compiler errors are more annoying though I dare say ;-)
>
> Actually, compile-time errors are fine,

Yes, they don't cause data corruption or anything like that,
but I still don't think the 390 people want to ship a kernel
that doesn't build -- and it seems they still need to support
GCC versions before 4.0.  Up until the day they can finally
drop 3.x, they'll have to...

> and easy to work around.

...use the simple workaround, annoying as it may be, at least
it works.  That's all I'm saying.

> *Much*
> more annoying is when gcc actively generates subtly bad code.

Quite so.

> We've had
> use-after-free issues due to incorrect gcc liveness calculations etc, 
> and
> inline asm has beeen one of the more common causes - exactly because 
> the
> kernel is one of the few users (along with glibc) that uses it at all.

The good news is that things are getting better since more and
more of the RTL transformations are ripped out.

> Now *those* are hard to find - the code works most of the time, but the
> compiler has inserted a really subtle race condition into the code
> (deallocated a local stack entry before last use). We had that with our
> semaphore code at some point.

Yeah, magnifying glass + lots of time kind of bug.  Lovely :-/


Segher


^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2007-08-12 19:14 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-08 23:07 [PATCH] make atomic_t volatile on all architectures Chris Snook
2007-08-08 23:18 ` Jesper Juhl
2007-08-08 23:31   ` Chris Snook
2007-08-08 23:51     ` Jesper Juhl
2007-08-08 23:25 ` Lennert Buytenhek
2007-08-08 23:35   ` Chris Snook
2007-08-09  1:03 ` Herbert Xu
2007-08-09  1:48   ` David Miller
2007-08-09  3:47     ` Paul E. McKenney
2007-08-09  7:47   ` Chris Snook
2007-08-09  8:30     ` Herbert Xu
2007-08-09 11:44       ` Chris Snook
2007-08-09  4:18 ` Linus Torvalds
2007-08-09  4:59   ` Jerry Jiang
2007-08-09  7:31   ` Chris Snook
2007-08-09  8:14     ` Heiko Carstens
2007-08-09 17:36     ` Chuck Ebbert
2007-08-09 17:55       ` Linus Torvalds
2007-08-09 18:20         ` Martin Schwidefsky
2007-08-12  5:53           ` Segher Boessenkool
2007-08-12  6:09             ` Linus Torvalds
2007-08-12  9:48               ` Martin Schwidefsky
2007-08-12  9:54                 ` Linus Torvalds
2007-08-12 16:30                 ` Segher Boessenkool
2007-08-12 18:11                   ` Linus Torvalds
2007-08-12 19:13                     ` Segher Boessenkool
2007-08-12 10:27               ` Segher Boessenkool
2007-08-12 17:59                 ` Linus Torvalds
2007-08-12  9:47             ` Martin Schwidefsky
2007-08-12 10:35               ` Segher Boessenkool
2007-08-09 17:57       ` Martin Schwidefsky
     [not found] <8Q2Pg-8uV-23@gated-at.bofh.it>
     [not found] ` <8Q7Fa-7rJ-1@gated-at.bofh.it>
     [not found]   ` <8Q8rD-hh-7@gated-at.bofh.it>
2007-08-09  9:10     ` Bodo Eggert
2007-08-09  9:18       ` Jerry Jiang
2007-08-09 15:00         ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).