* [PATCH RFC] locking: Add volatile to arch_spinlock_t structures
@ 2014-12-04 6:20 Paul E. McKenney
[not found] ` <CA+55aFzn-6asfWHB8SAvz4GJWz7uEriujgVLfaqoo_VPtNBLuA@mail.gmail.com>
0 siblings, 1 reply; 11+ messages in thread
From: Paul E. McKenney @ 2014-12-04 6:20 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-arch, dvyukov, dave, mingo, peterz, torvalds
More concern about compilers...
Most architectures mark the fields in their arch_spinlock_t structures
as volatile, which forces the compiler to respect the integrity of
those fields. Without volatile markings, the compiler is within its
rights to overwrite these fields, for example, by using a wide store
to update an adjacent field as long as it fixes them up later. This
sort of thing could come as a rather nasty surprise to any task attempting
to concurrently acquire that lock.
For example, on x86 for smallish NR_CPUS, arch_spinlock_t is 16 bits wide.
If there were three adjacent 16-bit fields in the structure containing
the arch_spinlock_t, a compiler might reasonably use 64-bit accesses
for those three fields. After a 64-bit load, the arch_spinlock_t's
value would be available in a register, so that the compiler might use
a 64-bit store followed by a 16-bit fixup store to update the three
adjacent 16-bit fields.
This commit therefore adds volatile to the arch_spinlock_t and
arch_rwlock_t fields that don't already have them.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: linux-arch@vger.kernel.org
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 47663fcb10ad..7d3b6ecb5301 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -9,14 +9,14 @@
typedef struct {
union {
- u32 slock;
+ volatile u32 slock;
struct __raw_tickets {
#ifdef __ARMEB__
- u16 next;
- u16 owner;
+ volatile u16 next;
+ volatile u16 owner;
#else
- u16 owner;
- u16 next;
+ volatile u16 owner;
+ volatile u16 next;
#endif
} tickets;
};
@@ -25,7 +25,7 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
typedef struct {
- u32 lock;
+ volatile u32 lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index b8d383665f56..0f841378f0f3 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -24,11 +24,11 @@
typedef struct {
#ifdef __AARCH64EB__
- u16 next;
- u16 owner;
+ volatile u16 next;
+ volatile u16 owner;
#else
- u16 owner;
- u16 next;
+ volatile u16 owner;
+ volatile u16 next;
#endif
} __aligned(4) arch_spinlock_t;
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index 9b2528e612c0..10f04a0482dd 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -14,14 +14,14 @@ typedef union {
* bits 0..15 : serving_now
* bits 16..31 : ticket
*/
- u32 lock;
+ volatile u32 lock;
struct {
#ifdef __BIG_ENDIAN
- u16 ticket;
- u16 serving_now;
+ volatile u16 ticket;
+ volatile u16 serving_now;
#else
- u16 serving_now;
- u16 ticket;
+ volatile u16 serving_now;
+ volatile u16 ticket;
#endif
} h;
} arch_spinlock_t;
diff --git a/arch/mn10300/include/asm/spinlock_types.h b/arch/mn10300/include/asm/spinlock_types.h
index 653dc519b405..04fd2c622f81 100644
--- a/arch/mn10300/include/asm/spinlock_types.h
+++ b/arch/mn10300/include/asm/spinlock_types.h
@@ -6,13 +6,13 @@
#endif
typedef struct arch_spinlock {
- unsigned int slock;
+ volatile unsigned int slock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
- unsigned int lock;
+ volatile unsigned int lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index d84b6939237c..0ccdda3b6842 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,14 +6,14 @@
#endif
typedef struct {
- unsigned int lock;
+ volatile unsigned int lock;
} __attribute__ ((aligned (4))) arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
typedef struct {
- unsigned int lock;
- unsigned int owner;
+ volatile unsigned int lock;
+ volatile unsigned int owner;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/tile/include/asm/spinlock_types.h b/arch/tile/include/asm/spinlock_types.h
index a71f59b49c50..29f70a14b979 100644
--- a/arch/tile/include/asm/spinlock_types.h
+++ b/arch/tile/include/asm/spinlock_types.h
@@ -23,14 +23,14 @@
/* Low 15 bits are "next"; high 15 bits are "current". */
typedef struct arch_spinlock {
- unsigned int lock;
+ volatile unsigned int lock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
/* High bit is "writer owns"; low 31 bits are a count of readers. */
typedef struct arch_rwlock {
- unsigned int lock;
+ volatile unsigned int lock;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
@@ -39,9 +39,9 @@ typedef struct arch_rwlock {
typedef struct arch_spinlock {
/* Next ticket number to hand out. */
- int next_ticket;
+ volatile int next_ticket;
/* The ticket number that currently owns this lock. */
- int current_ticket;
+ volatile int current_ticket;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 }
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..fead0ca82468 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -25,9 +25,9 @@ typedef u32 __ticketpair_t;
typedef struct arch_spinlock {
union {
- __ticketpair_t head_tail;
+ volatile __ticketpair_t head_tail;
struct __raw_tickets {
- __ticket_t head, tail;
+ volatile __ticket_t head, tail;
} tickets;
};
} arch_spinlock_t;
^ permalink raw reply related [flat|nested] 11+ messages in thread[parent not found: <CA+55aFzn-6asfWHB8SAvz4GJWz7uEriujgVLfaqoo_VPtNBLuA@mail.gmail.com>]
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures [not found] ` <CA+55aFzn-6asfWHB8SAvz4GJWz7uEriujgVLfaqoo_VPtNBLuA@mail.gmail.com> @ 2014-12-04 6:57 ` Paul E. McKenney [not found] ` <CA+55aFxGCxQNE4HzRP_Uk2FmFDn_+Hfm6GBz75S+5+SDeODVJQ@mail.gmail.com> 1 sibling, 0 replies; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 6:57 UTC (permalink / raw) To: Linus Torvalds Cc: Ingo Molnar, linux-arch, dave, peterz, dvyukov, linux-kernel One point of the patch is "one more thing to watch for" in generated code, namely temporary clobbering of synchronization variables, locks included, due to overzealous optimization. If this happens to the kernel, I guess that the other quick workaround is to add alignment directives or padding. (And yes, I have seen reports of non-kernel examples where gcc actually does this sort of thing.) But I don't feel strongly about this patch, not yet anyway, so will set it aside for the time being. Thanx, Paul On Wed, Dec 03, 2014 at 10:31:06PM -0800, Linus Torvalds wrote: > NAK. Volatile on data structures (as opposed to in code) is a disease. This > is wrong. > > We very much pass spinlock values around. They aren't volatile. The only > thing volatile is when you load the value from the lock itself. See for > example the stuff we do when we load a reflock from memory. Once we've > loaded the value into registers it's no longer volatile, and putting > violators into the data structures is actively *wrong*. > > So no, no, no. C got this wrong. Volatile data structures are a fundamental > mistake and a bug. The only good volatile is in an access (ie like > ACCESS_ONCE() and anything else is making things worse. > > We're not catering to crazy compiler people that can't understand that > fact. I'm not even seeing the point of your patch, since it seems to be > entirely about "let's make excuses for crap compilers" > > Linus > On Dec 3, 2014 10:20 PM, "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> > wrote: > > > More concern about compilers... > > > > Most architectures mark the fields in their arch_spinlock_t structures > > as volatile, which forces the compiler to respect the integrity of > > those fields. Without volatile markings, the compiler is within its > > rights to overwrite these fields, for example, by using a wide store > > to update an adjacent field as long as it fixes them up later. This > > sort of thing could come as a rather nasty surprise to any task attempting > > to concurrently acquire that lock. > > > > For example, on x86 for smallish NR_CPUS, arch_spinlock_t is 16 bits wide. > > If there were three adjacent 16-bit fields in the structure containing > > the arch_spinlock_t, a compiler might reasonably use 64-bit accesses > > for those three fields. After a 64-bit load, the arch_spinlock_t's > > value would be available in a register, so that the compiler might use > > a 64-bit store followed by a 16-bit fixup store to update the three > > adjacent 16-bit fields. > > > > This commit therefore adds volatile to the arch_spinlock_t and > > arch_rwlock_t fields that don't already have them. > > > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > Cc: linux-arch@vger.kernel.org > > Cc: Dmitry Vyukov <dvyukov@google.com> > > Cc: Davidlohr Bueso <dave@stgolabs.net> > > Cc: Ingo Molnar <mingo@kernel.org> > > Cc: Peter Zijlstra <peterz@infradead.org> > > Cc: Linus Torvalds <torvalds@linux-foundation.org> > > > > diff --git a/arch/arm/include/asm/spinlock_types.h > > b/arch/arm/include/asm/spinlock_types.h > > index 47663fcb10ad..7d3b6ecb5301 100644 > > --- a/arch/arm/include/asm/spinlock_types.h > > +++ b/arch/arm/include/asm/spinlock_types.h > > @@ -9,14 +9,14 @@ > > > > typedef struct { > > union { > > - u32 slock; > > + volatile u32 slock; > > struct __raw_tickets { > > #ifdef __ARMEB__ > > - u16 next; > > - u16 owner; > > + volatile u16 next; > > + volatile u16 owner; > > #else > > - u16 owner; > > - u16 next; > > + volatile u16 owner; > > + volatile u16 next; > > #endif > > } tickets; > > }; > > @@ -25,7 +25,7 @@ typedef struct { > > #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } > > > > typedef struct { > > - u32 lock; > > + volatile u32 lock; > > } arch_rwlock_t; > > > > #define __ARCH_RW_LOCK_UNLOCKED { 0 } > > diff --git a/arch/arm64/include/asm/spinlock_types.h > > b/arch/arm64/include/asm/spinlock_types.h > > index b8d383665f56..0f841378f0f3 100644 > > --- a/arch/arm64/include/asm/spinlock_types.h > > +++ b/arch/arm64/include/asm/spinlock_types.h > > @@ -24,11 +24,11 @@ > > > > typedef struct { > > #ifdef __AARCH64EB__ > > - u16 next; > > - u16 owner; > > + volatile u16 next; > > + volatile u16 owner; > > #else > > - u16 owner; > > - u16 next; > > + volatile u16 owner; > > + volatile u16 next; > > #endif > > } __aligned(4) arch_spinlock_t; > > > > diff --git a/arch/mips/include/asm/spinlock_types.h > > b/arch/mips/include/asm/spinlock_types.h > > index 9b2528e612c0..10f04a0482dd 100644 > > --- a/arch/mips/include/asm/spinlock_types.h > > +++ b/arch/mips/include/asm/spinlock_types.h > > @@ -14,14 +14,14 @@ typedef union { > > * bits 0..15 : serving_now > > * bits 16..31 : ticket > > */ > > - u32 lock; > > + volatile u32 lock; > > struct { > > #ifdef __BIG_ENDIAN > > - u16 ticket; > > - u16 serving_now; > > + volatile u16 ticket; > > + volatile u16 serving_now; > > #else > > - u16 serving_now; > > - u16 ticket; > > + volatile u16 serving_now; > > + volatile u16 ticket; > > #endif > > } h; > > } arch_spinlock_t; > > diff --git a/arch/mn10300/include/asm/spinlock_types.h > > b/arch/mn10300/include/asm/spinlock_types.h > > index 653dc519b405..04fd2c622f81 100644 > > --- a/arch/mn10300/include/asm/spinlock_types.h > > +++ b/arch/mn10300/include/asm/spinlock_types.h > > @@ -6,13 +6,13 @@ > > #endif > > > > typedef struct arch_spinlock { > > - unsigned int slock; > > + volatile unsigned int slock; > > } arch_spinlock_t; > > > > #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } > > > > typedef struct { > > - unsigned int lock; > > + volatile unsigned int lock; > > } arch_rwlock_t; > > > > #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } > > diff --git a/arch/s390/include/asm/spinlock_types.h > > b/arch/s390/include/asm/spinlock_types.h > > index d84b6939237c..0ccdda3b6842 100644 > > --- a/arch/s390/include/asm/spinlock_types.h > > +++ b/arch/s390/include/asm/spinlock_types.h > > @@ -6,14 +6,14 @@ > > #endif > > > > typedef struct { > > - unsigned int lock; > > + volatile unsigned int lock; > > } __attribute__ ((aligned (4))) arch_spinlock_t; > > > > #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } > > > > typedef struct { > > - unsigned int lock; > > - unsigned int owner; > > + volatile unsigned int lock; > > + volatile unsigned int owner; > > } arch_rwlock_t; > > > > #define __ARCH_RW_LOCK_UNLOCKED { 0 } > > diff --git a/arch/tile/include/asm/spinlock_types.h > > b/arch/tile/include/asm/spinlock_types.h > > index a71f59b49c50..29f70a14b979 100644 > > --- a/arch/tile/include/asm/spinlock_types.h > > +++ b/arch/tile/include/asm/spinlock_types.h > > @@ -23,14 +23,14 @@ > > > > /* Low 15 bits are "next"; high 15 bits are "current". */ > > typedef struct arch_spinlock { > > - unsigned int lock; > > + volatile unsigned int lock; > > } arch_spinlock_t; > > > > #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } > > > > /* High bit is "writer owns"; low 31 bits are a count of readers. */ > > typedef struct arch_rwlock { > > - unsigned int lock; > > + volatile unsigned int lock; > > } arch_rwlock_t; > > > > #define __ARCH_RW_LOCK_UNLOCKED { 0 } > > @@ -39,9 +39,9 @@ typedef struct arch_rwlock { > > > > typedef struct arch_spinlock { > > /* Next ticket number to hand out. */ > > - int next_ticket; > > + volatile int next_ticket; > > /* The ticket number that currently owns this lock. */ > > - int current_ticket; > > + volatile int current_ticket; > > } arch_spinlock_t; > > > > #define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 } > > diff --git a/arch/x86/include/asm/spinlock_types.h > > b/arch/x86/include/asm/spinlock_types.h > > index 5f9d7572d82b..fead0ca82468 100644 > > --- a/arch/x86/include/asm/spinlock_types.h > > +++ b/arch/x86/include/asm/spinlock_types.h > > @@ -25,9 +25,9 @@ typedef u32 __ticketpair_t; > > > > typedef struct arch_spinlock { > > union { > > - __ticketpair_t head_tail; > > + volatile __ticketpair_t head_tail; > > struct __raw_tickets { > > - __ticket_t head, tail; > > + volatile __ticket_t head, tail; > > } tickets; > > }; > > } arch_spinlock_t; > > > > ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <CA+55aFxGCxQNE4HzRP_Uk2FmFDn_+Hfm6GBz75S+5+SDeODVJQ@mail.gmail.com>]
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures [not found] ` <CA+55aFxGCxQNE4HzRP_Uk2FmFDn_+Hfm6GBz75S+5+SDeODVJQ@mail.gmail.com> @ 2014-12-04 7:02 ` Paul E. McKenney 2014-12-04 18:02 ` Linus Torvalds 0 siblings, 1 reply; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 7:02 UTC (permalink / raw) To: Linus Torvalds Cc: linux-arch, Davidlohr Bueso, dvyukov, peterz, linux-kernel, Ingo Molnar On Wed, Dec 03, 2014 at 10:40:45PM -0800, Linus Torvalds wrote: > On Dec 3, 2014 10:31 PM, "Linus Torvalds" <torvalds@linux-foundation.org> > wrote: > > > > So no, no, no. C got this wrong. Volatile data structures are a > fundamental mistake and a bug. > > BTW, I'm not at all interested in language lawyering and people who say > "but but we can do x". A compiler that modifies adjacent fields because the > standard leaves is open is a crap compiler, and we won't use it, or disable > the broken optimization. It is wrong from a concurrency standpoint anyway, > and adding broken volatiles is just making things worse. Understood, for example, adjacent fields protected by different locks as one example, where adjacent-field overwriting completely breaks even very conservatively designed code. Should be entertaining! ;-) Thanx, Paul ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 7:02 ` Paul E. McKenney @ 2014-12-04 18:02 ` Linus Torvalds 2014-12-04 18:02 ` Linus Torvalds 2014-12-04 18:36 ` Paul E. McKenney 0 siblings, 2 replies; 11+ messages in thread From: Linus Torvalds @ 2014-12-04 18:02 UTC (permalink / raw) To: Paul McKenney Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Wed, Dec 3, 2014 at 11:02 PM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Wed, Dec 03, 2014 at 10:40:45PM -0800, Linus Torvalds wrote: >> On Dec 3, 2014 10:31 PM, "Linus Torvalds" <torvalds@linux-foundation.org> >> wrote: >> > >> > So no, no, no. C got this wrong. Volatile data structures are a >> fundamental mistake and a bug. >> >> BTW, I'm not at all interested in language lawyering and people who say >> "but but we can do x". A compiler that modifies adjacent fields because the >> standard leaves is open is a crap compiler, and we won't use it, or disable >> the broken optimization. It is wrong from a concurrency standpoint anyway, >> and adding broken volatiles is just making things worse. > > Understood, for example, adjacent fields protected by different locks > as one example, where adjacent-field overwriting completely breaks even > very conservatively designed code. Exactly. Compilers that "optimize" things to touch fields that aren't touched by the source code are simply inherently buggy shit. I'm not at all interested in catering to their insanity. It doesn't matter one whit if they can point to the legacy C "virtual machine" definition and say that those accesses are invisible in the virtual machine. They are not invisible in real life, and it is entirely possible that two adjacent variables or fields are protected by different locks - even in non-kernel code. Claiming that they need to be marked volatile is a symptom of a diseased compiler writer. Now, the one exception to this is generally bitfields, because there the programmer knowingly and intentionally puts the fields together in the same storage unit. I also think that volatile bitfields are an insane concept, even if I think that the standard allows them. So I am not saying that compilers should try to magically make bitfield members not access the members around them. I also accept that some architectures are broken. Old non-byte/word-access alpha being the really canonical example. It's not the compilers fault if the architecture is broken, and the compiler cannot magically fix it. But compilers that think that "hey, vectorization is cool, and I can do load-stores and mask things dynamically" are misguided crap. It may be fancy, it may be really cool compiler technology, but it's fundamentally wrong unless the programmer told it was safe some way (be it with a "pragma" or "restrict" or a compile-time switch or whatever). Linus ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 18:02 ` Linus Torvalds @ 2014-12-04 18:02 ` Linus Torvalds 2014-12-04 18:36 ` Paul E. McKenney 1 sibling, 0 replies; 11+ messages in thread From: Linus Torvalds @ 2014-12-04 18:02 UTC (permalink / raw) To: Paul McKenney Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Wed, Dec 3, 2014 at 11:02 PM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Wed, Dec 03, 2014 at 10:40:45PM -0800, Linus Torvalds wrote: >> On Dec 3, 2014 10:31 PM, "Linus Torvalds" <torvalds@linux-foundation.org> >> wrote: >> > >> > So no, no, no. C got this wrong. Volatile data structures are a >> fundamental mistake and a bug. >> >> BTW, I'm not at all interested in language lawyering and people who say >> "but but we can do x". A compiler that modifies adjacent fields because the >> standard leaves is open is a crap compiler, and we won't use it, or disable >> the broken optimization. It is wrong from a concurrency standpoint anyway, >> and adding broken volatiles is just making things worse. > > Understood, for example, adjacent fields protected by different locks > as one example, where adjacent-field overwriting completely breaks even > very conservatively designed code. Exactly. Compilers that "optimize" things to touch fields that aren't touched by the source code are simply inherently buggy shit. I'm not at all interested in catering to their insanity. It doesn't matter one whit if they can point to the legacy C "virtual machine" definition and say that those accesses are invisible in the virtual machine. They are not invisible in real life, and it is entirely possible that two adjacent variables or fields are protected by different locks - even in non-kernel code. Claiming that they need to be marked volatile is a symptom of a diseased compiler writer. Now, the one exception to this is generally bitfields, because there the programmer knowingly and intentionally puts the fields together in the same storage unit. I also think that volatile bitfields are an insane concept, even if I think that the standard allows them. So I am not saying that compilers should try to magically make bitfield members not access the members around them. I also accept that some architectures are broken. Old non-byte/word-access alpha being the really canonical example. It's not the compilers fault if the architecture is broken, and the compiler cannot magically fix it. But compilers that think that "hey, vectorization is cool, and I can do load-stores and mask things dynamically" are misguided crap. It may be fancy, it may be really cool compiler technology, but it's fundamentally wrong unless the programmer told it was safe some way (be it with a "pragma" or "restrict" or a compile-time switch or whatever). Linus ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 18:02 ` Linus Torvalds 2014-12-04 18:02 ` Linus Torvalds @ 2014-12-04 18:36 ` Paul E. McKenney 2014-12-04 19:18 ` Linus Torvalds 2014-12-04 21:45 ` One Thousand Gnomes 1 sibling, 2 replies; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 18:36 UTC (permalink / raw) To: Linus Torvalds Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Thu, Dec 04, 2014 at 10:02:14AM -0800, Linus Torvalds wrote: > On Wed, Dec 3, 2014 at 11:02 PM, Paul E. McKenney > <paulmck@linux.vnet.ibm.com> wrote: > > On Wed, Dec 03, 2014 at 10:40:45PM -0800, Linus Torvalds wrote: > >> On Dec 3, 2014 10:31 PM, "Linus Torvalds" <torvalds@linux-foundation.org> > >> wrote: > >> > > >> > So no, no, no. C got this wrong. Volatile data structures are a > >> fundamental mistake and a bug. > >> > >> BTW, I'm not at all interested in language lawyering and people who say > >> "but but we can do x". A compiler that modifies adjacent fields because the > >> standard leaves is open is a crap compiler, and we won't use it, or disable > >> the broken optimization. It is wrong from a concurrency standpoint anyway, > >> and adding broken volatiles is just making things worse. > > > > Understood, for example, adjacent fields protected by different locks > > as one example, where adjacent-field overwriting completely breaks even > > very conservatively designed code. > > Exactly. Compilers that "optimize" things to touch fields that aren't > touched by the source code are simply inherently buggy shit. I'm not > at all interested in catering to their insanity. > > It doesn't matter one whit if they can point to the legacy C "virtual > machine" definition and say that those accesses are invisible in the > virtual machine. They are not invisible in real life, and it is > entirely possible that two adjacent variables or fields are protected > by different locks - even in non-kernel code. Claiming that they need > to be marked volatile is a symptom of a diseased compiler writer. > > Now, the one exception to this is generally bitfields, because there > the programmer knowingly and intentionally puts the fields together in > the same storage unit. I also think that volatile bitfields are an > insane concept, even if I think that the standard allows them. So I am > not saying that compilers should try to magically make bitfield > members not access the members around them. > > I also accept that some architectures are broken. Old > non-byte/word-access alpha being the really canonical example. It's > not the compilers fault if the architecture is broken, and the > compiler cannot magically fix it. I have to ask... Does this mean we can remove the current restrictions against 8-bit and 16-bit access from smp_load_acquire() and smp_store_release()? > But compilers that think that "hey, vectorization is cool, and I can > do load-stores and mask things dynamically" are misguided crap. It may > be fancy, it may be really cool compiler technology, but it's > fundamentally wrong unless the programmer told it was safe some way > (be it with a "pragma" or "restrict" or a compile-time switch or > whatever). You might be happy to hear that I just sent an email to the C++ standards committee noting that valid C11 compilers are not supposed to be able to introduce data races [1]. I further argued that any store-widening access affecting any non-private adjacent variable must be assumed to (illegally per the C11 standard) introduce a data race, even if there are no locks, atomic accesses, transactions, or any other synchronization mechanism anywhere in that translation unit. After all, any non-static function in that translation unit might be called from some other translation unit that -did- use locking or whatever. I will let you know how it goes. ;-) Thanx, Paul [1] A "data race" occurs in any C11 program where multiple threads might be accessing a non-atomic variable, and where at least one of the accesses is a write. C11 states that data races result in undefined behavior. Therefore, if the source code does not contain a data race, the object code had also better be free of data races. Otherwise, the compiler inflicted undefined behavior on a perfectly legitimate program. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 18:36 ` Paul E. McKenney @ 2014-12-04 19:18 ` Linus Torvalds 2014-12-04 20:00 ` Paul E. McKenney 2014-12-04 21:45 ` One Thousand Gnomes 1 sibling, 1 reply; 11+ messages in thread From: Linus Torvalds @ 2014-12-04 19:18 UTC (permalink / raw) To: Paul McKenney Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Thu, Dec 4, 2014 at 10:36 AM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > > I have to ask... Does this mean we can remove the current > restrictions against 8-bit and 16-bit access from smp_load_acquire() > and smp_store_release()? I'd certainly be ok with it. Alpha doesn't have acquire/release semantics anyway (really, it's the worst memory ordering model *ever*), so those will end up being just plain (access-once) loads and stores, followed/preceded by a memory barrier anyway. So it arguably is no worse than the existing situation with ACCESS_ONCE() on alpha. And quite frankly, I simply don't think that an old broken alpha architecture should be something we worry about. Remember: the byte and word ops were introduced in 21164, and released in 1996, so it's not even like "alpha has broken behavior". It's literally just "the very earliest alphas were broken", and I suspect most of those machines (at least running Linux) weren't even SMP (ie somebody may still have a Multia around for sentimental reasons, but SMP? No). Of course, I'd like there to be a real reason to do so, not just "who cares about really old alphas, nyaah, nyaah, nyaah"? But if there is a clear case where a byte load-acquire and store-release would improve on something important, then yes, I think we should do it. We dropped support for the original i386, we can drop support for old broken alphas. People running them for sentimental reasons might as well be sentimental about software too, and run old kernels ;) Linus ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 19:18 ` Linus Torvalds @ 2014-12-04 20:00 ` Paul E. McKenney 2014-12-04 20:12 ` Paul E. McKenney 0 siblings, 1 reply; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 20:00 UTC (permalink / raw) To: Linus Torvalds Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Thu, Dec 04, 2014 at 11:18:17AM -0800, Linus Torvalds wrote: > On Thu, Dec 4, 2014 at 10:36 AM, Paul E. McKenney > <paulmck@linux.vnet.ibm.com> wrote: > > > > I have to ask... Does this mean we can remove the current > > restrictions against 8-bit and 16-bit access from smp_load_acquire() > > and smp_store_release()? > > I'd certainly be ok with it. Alpha doesn't have acquire/release > semantics anyway (really, it's the worst memory ordering model > *ever*), so those will end up being just plain (access-once) loads and > stores, followed/preceded by a memory barrier anyway. So it arguably > is no worse than the existing situation with ACCESS_ONCE() on alpha. > > And quite frankly, I simply don't think that an old broken alpha > architecture should be something we worry about. Remember: the byte > and word ops were introduced in 21164, and released in 1996, so it's > not even like "alpha has broken behavior". It's literally just "the > very earliest alphas were broken", and I suspect most of those > machines (at least running Linux) weren't even SMP (ie somebody may > still have a Multia around for sentimental reasons, but SMP? No). > > Of course, I'd like there to be a real reason to do so, not just "who > cares about really old alphas, nyaah, nyaah, nyaah"? But if there is a > clear case where a byte load-acquire and store-release would improve > on something important, then yes, I think we should do it. > > We dropped support for the original i386, we can drop support for old > broken alphas. People running them for sentimental reasons might as > well be sentimental about software too, and run old kernels ;) Sad to say, I cannot argue that the places where I have hit this thus far have been anything more than irritations, although they have sometimes been quite irritating. One is in rcutorture, but memory size is not a concern there. Another is in a field in a struct that also contains an rcu_head structure, so shrinking it below an int doesn't actually save any space. A third one is a single global variable used to track sysidle state, but it is just a single global variable that is not in TINY_RCU, so who cares? If I find some place where this is either increasing the size of a data structure that can have lots of instances or where it is preventing an explicit memory barrier from being pulled into an smp_load_acquire() or smp_store_release(), I will revisit. And it turns out that there really is a prohibition against clobbering other-thread-accessible adjacent fields and variables in the C11 standard, section 3.14p2: NOTE 1 Two threads of execution can update and access separate memory locations without interfering with each other. In the C++11 standard, 1.7p3 has a similar prohibition, as Ville Voutilainen pointed out in response to my email: Two or more threads of execution (1.10) can update and access separate memory locations without interfering with each other. I would have expected this to be in 1.10 rather than 1.7, but as long as it is in there somewhere. ;-) In both standards, "separate memory locations" mean "having no bytes in common". This confirms your intuition that concurrent access to bitfields cannnot always be trusted. Yes, if the two bitfields don't have any bits living in the same byte, we can in theory have two threads updating them conconcurrently, but that would be a pain to verify. So any compiler that clobbers some adjacent non-bitfield variable or field that is accessible by other threads is not just despicable, it fails to conform to the standard. Whew! ;-) Thanx, Paul ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 20:00 ` Paul E. McKenney @ 2014-12-04 20:12 ` Paul E. McKenney 0 siblings, 0 replies; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 20:12 UTC (permalink / raw) To: Linus Torvalds Cc: linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Thu, Dec 04, 2014 at 12:00:52PM -0800, Paul E. McKenney wrote: [ . . . ] > So any compiler that clobbers some adjacent non-bitfield variable or > field that is accessible by other threads is not just despicable, it > fails to conform to the standard. > > Whew! ;-) And part of the reason for my confusion is that I am using an old version of gcc, 4.6.3. Apparently this aspect of gcc wasn't fixed until 4.7 or thereabouts. Thanx, Paul ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 18:36 ` Paul E. McKenney 2014-12-04 19:18 ` Linus Torvalds @ 2014-12-04 21:45 ` One Thousand Gnomes 2014-12-04 22:06 ` Paul E. McKenney 1 sibling, 1 reply; 11+ messages in thread From: One Thousand Gnomes @ 2014-12-04 21:45 UTC (permalink / raw) To: Paul E. McKenney Cc: Linus Torvalds, linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar > anywhere in that translation unit. After all, any non-static function > in that translation unit might be called from some other translation > unit that -did- use locking or whatever. > > I will let you know how it goes. ;-) It breaks DEC10 ;-) If there is kickback over things like optimisation perhaps the gcc maintainers could at least consider something like int __attribute((threadsafe)) fred; ?? Alan ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH RFC] locking: Add volatile to arch_spinlock_t structures 2014-12-04 21:45 ` One Thousand Gnomes @ 2014-12-04 22:06 ` Paul E. McKenney 0 siblings, 0 replies; 11+ messages in thread From: Paul E. McKenney @ 2014-12-04 22:06 UTC (permalink / raw) To: One Thousand Gnomes Cc: Linus Torvalds, linux-arch@vger.kernel.org, Davidlohr Bueso, Dmitry Vyukov, Peter Zijlstra, Linux Kernel Mailing List, Ingo Molnar On Thu, Dec 04, 2014 at 09:45:46PM +0000, One Thousand Gnomes wrote: > > anywhere in that translation unit. After all, any non-static function > > in that translation unit might be called from some other translation > > unit that -did- use locking or whatever. > > > > I will let you know how it goes. ;-) > > It breaks DEC10 ;-) To say nothing of CDC 6600 systems lacking the compare-move unit. ;-) > If there is kickback over things like optimisation perhaps the gcc > maintainers could at least consider something like > > int __attribute((threadsafe)) fred; > > ?? Not needed for recent gcc versions -- both the C11 and C++11 standards require that different threads be permitted to independently access different variables, where "different" means "no bits of the two variables residing in the same byte". From what I can see, this would mean that a conforming pre-EV56 Alpha C11 compiler would need to use LDL_L and STL_C to carry out 8-bit and 16-bit stores. ;-) Thanx, Paul ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2014-12-04 22:08 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-04 6:20 [PATCH RFC] locking: Add volatile to arch_spinlock_t structures Paul E. McKenney
[not found] ` <CA+55aFzn-6asfWHB8SAvz4GJWz7uEriujgVLfaqoo_VPtNBLuA@mail.gmail.com>
2014-12-04 6:57 ` Paul E. McKenney
[not found] ` <CA+55aFxGCxQNE4HzRP_Uk2FmFDn_+Hfm6GBz75S+5+SDeODVJQ@mail.gmail.com>
2014-12-04 7:02 ` Paul E. McKenney
2014-12-04 18:02 ` Linus Torvalds
2014-12-04 18:02 ` Linus Torvalds
2014-12-04 18:36 ` Paul E. McKenney
2014-12-04 19:18 ` Linus Torvalds
2014-12-04 20:00 ` Paul E. McKenney
2014-12-04 20:12 ` Paul E. McKenney
2014-12-04 21:45 ` One Thousand Gnomes
2014-12-04 22:06 ` Paul E. McKenney
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox