public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
* GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
       [not found] ` <Ykc0xrLv391/jdJj@FVFF77S0Q05N>
@ 2022-04-05 12:51   ` Mark Rutland
  2022-04-05 13:04     ` Mark Rutland
                       ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Mark Rutland @ 2022-04-05 12:51 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel
  Cc: linux-arch, gcc, catalin.marinas, will, marcan, maz,
	szabolcs.nagy, f.fainelli, opendmb, Andrew Pinski, Ard Biesheuvel,
	Peter Zijlstra, x86, andrew.cooper3, Jeremy Linton

Hi all,

[adding kernel folk who work on asm stuff]

As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
bugzilla:  

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160

... which is a P1 release blocker, and is currently being investigated.

Jemery originally reported this as an issue with {readl,writel}_relaxed(), but
the underlying problem doesn't have anything to do with those specifically.

I'm dumping a bunch of info here largely for posterity / archival, and to find
out who (from the kernel side) is willing and able to test proposed compiler
fixes, once those are available.

I'm happy to do so for aarch64; Peter, I assume you'd be happy to look at the
x86 side?

This is a generic issue, and 

I wrote test cases for aarch64 and x86_64. Those are inline later in this mail,
and currently you can see them on compiler explorer:

  aarch64: https://godbolt.org/z/vMczqjYvs

  x86_64: https://godbolt.org/z/cveff9hq5



My aarch64 test case is:

| #define sysreg_read(regname)		\
| ({					\
| 	unsigned long __sr_val;		\
| 	asm volatile(			\
| 	"mrs %0, " #regname "\n"	\
| 	: "=r" (__sr_val));		\
| 					\
| 	__sr_val;			\
| })
| 
| #define sysreg_write(regname, __sw_val)	\
| do {					\
| 	asm volatile(			\
| 	"msr " #regname ", %0\n"	\
| 	:				\
| 	: "r" (__sw_val));		\
| } while (0)
| 
| #define isb()				\
| do {					\
| 	asm volatile(			\
| 	"isb"				\
| 	:				\
| 	:				\
| 	: "memory");			\
| } while (0)
| 
| static unsigned long sctlr_read(void)
| {
| 	return sysreg_read(sctlr_el1);
| }
| 
| static void sctlr_write(unsigned long val)
| {
| 	sysreg_write(sctlr_el1, val);
| }
| 
| static void sctlr_rmw(void)
| {
| 	unsigned long val;
| 
| 	val = sctlr_read();
| 	val |= 1UL << 7;
| 	sctlr_write(val);
| }
| 
| void sctlr_read_multiple(void)
| {
| 	sctlr_read();
| 	sctlr_read();
| 	sctlr_read();
| 	sctlr_read();
| }
| 
| void sctlr_write_multiple(void)
| {
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| }
| 
| void sctlr_rmw_multiple(void)
| {
| 	sctlr_rmw();
| 	sctlr_rmw();
| 	sctlr_rmw();
| 	sctlr_rmw();
| }
| 
| void function(void)
| {
| 	sctlr_read_multiple();
| 	sctlr_write_multiple();
| 	sctlr_rmw_multiple();
| 
| 	isb();
| }

Per compiler explorer (https://godbolt.org/z/vMczqjYvs) GCC trunk currently
compiles this as:

| sctlr_rmw:
|         mrs x0, sctlr_el1
|         orr     x0, x0, 128
|         msr sctlr_el1, x0
|         ret
| sctlr_read_multiple:
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         ret
| sctlr_write_multiple:
|         mov     x0, 0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         ret
| sctlr_rmw_multiple:
|         ret
| function:
|         isb
|         ret

Whereas GCC 11.2 compiles this as:

| sctlr_rmw:
|         mrs x0, sctlr_el1
|         orr     x0, x0, 128
|         msr sctlr_el1, x0
|         ret
| sctlr_read_multiple:
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         ret
| sctlr_write_multiple:
|         mov     x0, 0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         ret
| sctlr_rmw_multiple:
|         stp     x29, x30, [sp, -16]!
|         mov     x29, sp
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         ldp     x29, x30, [sp], 16
|         ret
| function:
|         stp     x29, x30, [sp, -16]!
|         mov     x29, sp
|         bl      sctlr_read_multiple
|         bl      sctlr_write_multiple
|         bl      sctlr_rmw_multiple
|         isb
|         ldp     x29, x30, [sp], 16
|         ret



My x86_64 test case is:

| unsigned long rdmsr(unsigned long reg)
| {
|     unsigned int lo, hi;
| 
|     asm volatile(
|     "rdmsr"
|     : "=d" (hi), "=a" (lo)
|     : "c" (reg)
|     );
| 
|     return ((unsigned long)hi << 32) | lo;
| }
| 
| void wrmsr(unsigned long reg, unsigned long val)
| {
|     unsigned int lo = val;
|     unsigned int hi = val >> 32;
| 
|     asm volatile(
|     "wrmsr"
|     :
|     : "d" (hi), "a" (lo), "c" (reg)
|     );
| }
| 
| void msr_rmw_set_bits(unsigned long reg, unsigned long bits)
| {
|     unsigned long val;
| 
|     val = rdmsr(reg);
|     val |= bits;
|     wrmsr(reg, val);
| }
| 
| void func_with_msr_side_effects(unsigned long reg)
| {
|     msr_rmw_set_bits(reg, 1UL << 0);
|     msr_rmw_set_bits(reg, 1UL << 1);
|     msr_rmw_set_bits(reg, 1UL << 2);
|     msr_rmw_set_bits(reg, 1UL << 3);
| }

Per compiler explorer (https://godbolt.org/z/cveff9hq5) GCC trunk currently
compiles this as:

| msr_rmw_set_bits:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rsi
|         or      rax, rdx
|         mov     rdx, rax
|         shr     rdx, 32
|         wrmsr
|         ret
| func_with_msr_side_effects:
|         ret

While GCC 11.2 compiles that as:

| msr_rmw_set_bits:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rsi
|         or      rax, rdx
|         mov     rdx, rax
|         shr     rdx, 32
|         wrmsr
|         ret
| func_with_msr_side_effects:
|         push    rbp
|         push    rbx
|         mov     rbx, rdi
|         mov     rbp, rsi
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         pop     rbx
|         pop     rbp
|         ret

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-05 12:51   ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
@ 2022-04-05 13:04     ` Mark Rutland
  2022-04-05 13:20       ` Andrew Cooper
  2022-04-05 14:05     ` Peter Zijlstra
  2022-04-11 10:31     ` Mark Rutland
  2 siblings, 1 reply; 7+ messages in thread
From: Mark Rutland @ 2022-04-05 13:04 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel
  Cc: linux-arch, gcc, catalin.marinas, will, marcan, maz,
	szabolcs.nagy, f.fainelli, opendmb, Andrew Pinski, Ard Biesheuvel,
	Peter Zijlstra, x86, andrew.cooper3, Jeremy Linton

Sorry, I copied the wrong version of the x86_64 assembly as generated by GCC
11.2.0). Updated below.

On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
> My x86_64 test case is:
> 
> | unsigned long rdmsr(unsigned long reg)
> | {
> |     unsigned int lo, hi;
> | 
> |     asm volatile(
> |     "rdmsr"
> |     : "=d" (hi), "=a" (lo)
> |     : "c" (reg)
> |     );
> | 
> |     return ((unsigned long)hi << 32) | lo;
> | }
> | 
> | void wrmsr(unsigned long reg, unsigned long val)
> | {
> |     unsigned int lo = val;
> |     unsigned int hi = val >> 32;
> | 
> |     asm volatile(
> |     "wrmsr"
> |     :
> |     : "d" (hi), "a" (lo), "c" (reg)
> |     );
> | }
> | 
> | void msr_rmw_set_bits(unsigned long reg, unsigned long bits)
> | {
> |     unsigned long val;
> | 
> |     val = rdmsr(reg);
> |     val |= bits;
> |     wrmsr(reg, val);
> | }
> | 
> | void func_with_msr_side_effects(unsigned long reg)
> | {
> |     msr_rmw_set_bits(reg, 1UL << 0);
> |     msr_rmw_set_bits(reg, 1UL << 1);
> |     msr_rmw_set_bits(reg, 1UL << 2);
> |     msr_rmw_set_bits(reg, 1UL << 3);
> | }
> 
> Per compiler explorer (https://godbolt.org/z/cveff9hq5) GCC trunk currently
> compiles this as:
> 
> | msr_rmw_set_bits:
> |         mov     rcx, rdi
> |         rdmsr
> |         sal     rdx, 32
> |         mov     eax, eax
> |         or      rax, rsi
> |         or      rax, rdx
> |         mov     rdx, rax
> |         shr     rdx, 32
> |         wrmsr
> |         ret
> | func_with_msr_side_effects:
> |         ret
> 

GCC 11.2 compiles that as:

| rdmsr:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rdx
|         ret
| wrmsr:
|         mov     rax, rsi
|         mov     rdx, rsi
|         shr     rdx, 32
|         mov     rcx, rdi
|         wrmsr
|         ret
| msr_rmw_set_bits:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rsi
|         or      rax, rdx
|         mov     rdx, rax
|         shr     rdx, 32
|         wrmsr
|         ret
| func_with_msr_side_effects:
|         push    rbx
|         mov     rbx, rdi
|         mov     esi, 1
|         call    msr_rmw_set_bits
|         mov     esi, 2
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     esi, 4
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     esi, 8
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         pop     rbx
|         ret

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-05 13:04     ` Mark Rutland
@ 2022-04-05 13:20       ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2022-04-05 13:20 UTC (permalink / raw)
  To: Mark Rutland, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
  Cc: linux-arch@vger.kernel.org, gcc@gcc.gnu.org,
	catalin.marinas@arm.com, will@kernel.org, marcan@marcan.st,
	maz@kernel.org, szabolcs.nagy@arm.com, f.fainelli@gmail.com,
	opendmb@gmail.com, Andrew Pinski, Ard Biesheuvel, Peter Zijlstra,
	x86@kernel.org, Jeremy Linton

On 05/04/2022 14:04, Mark Rutland wrote:
> On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
> My x86_64 test case is:
>
> Per compiler explorer (https://godbolt.org/z/cveff9hq5) GCC trunk currently
> compiles this as:
>
> | msr_rmw_set_bits:
> |         mov     rcx, rdi
> |         rdmsr
> |         sal     rdx, 32
> |         mov     eax, eax
> |         or      rax, rsi
> |         or      rax, rdx
> |         mov     rdx, rax
> |         shr     rdx, 32
> |         wrmsr
> |         ret
> | func_with_msr_side_effects:
> |         ret
>

Yeah... that code gen is very broken for func_with_msr_side_effects().

~Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-05 12:51   ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
  2022-04-05 13:04     ` Mark Rutland
@ 2022-04-05 14:05     ` Peter Zijlstra
  2022-04-11 10:22       ` Mark Rutland
  2022-04-11 10:31     ` Mark Rutland
  2 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2022-04-05 14:05 UTC (permalink / raw)
  To: Mark Rutland
  Cc: linux-arm-kernel, linux-kernel, linux-arch, gcc, catalin.marinas,
	will, marcan, maz, szabolcs.nagy, f.fainelli, opendmb,
	Andrew Pinski, Ard Biesheuvel, x86, andrew.cooper3, Jeremy Linton

On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
> Hi all,
> 
> [adding kernel folk who work on asm stuff]
> 
> As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
> calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
> bugzilla:  
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160
> 
> ... which is a P1 release blocker, and is currently being investigated.
> 
> Jemery originally reported this as an issue with {readl,writel}_relaxed(), but
> the underlying problem doesn't have anything to do with those specifically.
> 
> I'm dumping a bunch of info here largely for posterity / archival, and to find
> out who (from the kernel side) is willing and able to test proposed compiler
> fixes, once those are available.
> 
> I'm happy to do so for aarch64; Peter, I assume you'd be happy to look at the
> x86 side?

Sure..

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-05 14:05     ` Peter Zijlstra
@ 2022-04-11 10:22       ` Mark Rutland
  0 siblings, 0 replies; 7+ messages in thread
From: Mark Rutland @ 2022-04-11 10:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-arm-kernel, linux-kernel, linux-arch, gcc, catalin.marinas,
	will, marcan, maz, szabolcs.nagy, f.fainelli, opendmb,
	Andrew Pinski, Ard Biesheuvel, x86, andrew.cooper3, Jeremy Linton

On Tue, Apr 05, 2022 at 04:05:22PM +0200, Peter Zijlstra wrote:
> On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
> > Hi all,
> > 
> > [adding kernel folk who work on asm stuff]
> > 
> > As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
> > calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
> > bugzilla:  
> > 
> >   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160
> > 
> > ... which is a P1 release blocker, and is currently being investigated.
> > 
> > Jemery originally reported this as an issue with {readl,writel}_relaxed(), but
> > the underlying problem doesn't have anything to do with those specifically.
> > 
> > I'm dumping a bunch of info here largely for posterity / archival, and to find
> > out who (from the kernel side) is willing and able to test proposed compiler
> > fixes, once those are available.
> > 
> > I'm happy to do so for aarch64; Peter, I assume you'd be happy to look at the
> > x86 side?
> 
> Sure..

FWIW, compiler explorer now have a trunk build with the fix, and my x86-64
example now gets compiled to something which looks correct:

  https://godbolt.org/z/cveff9hq5

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-05 12:51   ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
  2022-04-05 13:04     ` Mark Rutland
  2022-04-05 14:05     ` Peter Zijlstra
@ 2022-04-11 10:31     ` Mark Rutland
  2022-04-11 19:02       ` Jeremy Linton
  2 siblings, 1 reply; 7+ messages in thread
From: Mark Rutland @ 2022-04-11 10:31 UTC (permalink / raw)
  To: linux-arm-kernel, linux-kernel, Jeremy Linton
  Cc: linux-arch, gcc, catalin.marinas, will, marcan, maz,
	szabolcs.nagy, f.fainelli, opendmb, Andrew Pinski, Ard Biesheuvel,
	Peter Zijlstra, x86, andrew.cooper3

On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
> Hi all,
> 
> [adding kernel folk who work on asm stuff]
> 
> As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
> calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
> bugzilla:  
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160
> 
> ... which is a P1 release blocker, and is currently being investigated.

Jan Hubicka fixed this in GCC commit:

  aabb9a261ef060cf ("Propagate nondeterministic and side_effects flags in modref summary after inlining")

... and all my local tests look good with that applied.

Compiler explorer's trunk build now has that fix, so the examples from before
now look good:

  aarch64: https://godbolt.org/z/vMczqjYvs

  x86_64: https://godbolt.org/z/cveff9hq5

Jeremy, now that the real issue has been identified and fixed, I assume you'll
send a revert for commit:

  8d3ea3d402db94b6 ("net: bcmgenet: Use stronger register read/writes to assure ordering")

... ?

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
  2022-04-11 10:31     ` Mark Rutland
@ 2022-04-11 19:02       ` Jeremy Linton
  0 siblings, 0 replies; 7+ messages in thread
From: Jeremy Linton @ 2022-04-11 19:02 UTC (permalink / raw)
  To: Mark Rutland, linux-arm-kernel, linux-kernel
  Cc: linux-arch, gcc, catalin.marinas, will, marcan, maz,
	szabolcs.nagy, f.fainelli, opendmb, Andrew Pinski, Ard Biesheuvel,
	Peter Zijlstra, x86, andrew.cooper3

Hi,


On 4/11/22 05:31, Mark Rutland wrote:
> On Tue, Apr 05, 2022 at 01:51:30PM +0100, Mark Rutland wrote:
>> Hi all,
>>
>> [adding kernel folk who work on asm stuff]
>>
>> As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
>> calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
>> bugzilla:
>>
>>    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160
>>
>> ... which is a P1 release blocker, and is currently being investigated.
> 
> Jan Hubicka fixed this in GCC commit:
> 
>    aabb9a261ef060cf ("Propagate nondeterministic and side_effects flags in modref summary after inlining")
> 
> ... and all my local tests look good with that applied.
> 
> Compiler explorer's trunk build now has that fix, so the examples from before
> now look good:
> 
>    aarch64: https://godbolt.org/z/vMczqjYvs
> 
>    x86_64: https://godbolt.org/z/cveff9hq5
> 
> Jeremy, now that the real issue has been identified and fixed, I assume you'll
> send a revert for commit:
> 
>    8d3ea3d402db94b6 ("net: bcmgenet: Use stronger register read/writes to assure ordering")
> 
> ... ?

Yes, that's the plan.

Thanks,


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-04-11 19:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20220401164406.61583-1-jeremy.linton@arm.com>
     [not found] ` <Ykc0xrLv391/jdJj@FVFF77S0Q05N>
2022-04-05 12:51   ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
2022-04-05 13:04     ` Mark Rutland
2022-04-05 13:20       ` Andrew Cooper
2022-04-05 14:05     ` Peter Zijlstra
2022-04-11 10:22       ` Mark Rutland
2022-04-11 10:31     ` Mark Rutland
2022-04-11 19:02       ` Jeremy Linton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox