All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Larry Finger <Larry.Finger@lwfinger.net>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Thorsten Leemhuis <regressions@leemhuis.info>,
	linuxppc-dev@lists.ozlabs.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: gcc 4.6.3 miscompile on ppc32 (was Re: Regression in kernel 4.12-rc1 for Powerpc 32 - bisected to commit 3448890c32c3)
Date: Sun, 25 Jun 2017 21:53:24 +0100	[thread overview]
Message-ID: <20170625205324.GW10672@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20170625111404.GV10672@ZenIV.linux.org.uk>

[-- Attachment #1: Type: text/plain, Size: 2667 bytes --]

On Sun, Jun 25, 2017 at 12:14:04PM +0100, Al Viro wrote:
> On Sun, Jun 25, 2017 at 10:53:58AM +0100, Al Viro wrote:
> > On Sat, Jun 24, 2017 at 12:29:23PM -0500, Larry Finger wrote:
> > 
> > > I made a break through. If I turn off inline copy to/from users for 32-bit
> > > ppc with the following patch, then the system boots:
> > 
> > OK...  So it's 4.6.3 miscompiling something - it is hardware-independent,
> > reproduced in qemu.  I'd like to get more self-contained example of
> > miscompile, though; should be done by tonight...
> 
> OK, it's the call in rw_copy_check_uvector(); with INLINE_COPY_FROM_USER
> it's miscompiled by 4.6.3.  I hadn't looked through the generated code
> yet; will do that after I grab some sleep.

Confirmed.  It manages to bugger the loop immediately after the (successful)
copying of iovec array in rw_copy_check_uvector(); both with and without
INLINE_COPY_FROM_USER it has (just before the call of copy_from_user()) r27
set to nr_segs * sizeof(struct iovec).  The call is made, we check that it
has succeeded and that's when it hits the fan: without INLINE_COPY_FROM_USER
we have (interleaved with unrelated insns)
        addi 27,27,-8
        srwi 27,27,3
        addi 27,27,1
        mtctr 27
Weird, but manages to pass nr_segs to mtctr.  _With_ INLINE_COPY_FROM_USER we
get this:
        lis 9,0x2000
        mtctr 9
In other words, the loop will try to go through 8192 iterations.  No idea where
that number has come from, but it sure as hell is wrong.  That's where those
-EINVAL, etc. are coming from - we run into something negative in iov[seg].len,
after having run out of on-stack iovec array.

	Assembler generated out of rw_copy_check_uvector() with and without
INLINE_COPY_FROM_USER is attached; it's a definite miscompile.  Neither 4.4.5
nor 6.3.0 use mtctr/bdnz for that loop.

	The bottom line is, ppc cross-toolchain on kernel.org happens to be
the version that miscompiles rw_copy_check_uvector() with INLINE_COPY_FROM_USER
and hell knows what else.  Said that, I would rather have ppc32 drop the
INLINE_COPY_{TO,FROM}_USER anyway; that won't fix any other places where
the same 4.6.3 bug hits, but I seriously suspect that it will end up being
faster even on non^Wless buggy gcc versions.  Could powerpc folks check
what does removing those two defines from arch/powerpc/include/asm/uaccess.h
do to performance?  If there's no slowdown, I would strongly recommend just
removing those as in the patch Larry has posted upthread.

	Fixing whatever it is in gcc 4.6.3 that triggers that behaviour is
IMO pointless - it might make sense to switch kernel.org cross-toolchain to
something more recent, but that's it.

[-- Attachment #2: rw_copy_check_uvector() with INLINE_COPY_FROM_USER --]
[-- Type: text/plain, Size: 3342 bytes --]

	.globl rw_copy_check_uvector
	.type	rw_copy_check_uvector, @function
rw_copy_check_uvector:
.LFB2683:
	.loc 1 773 0
	stwu 1,-32(1)	 #,,
.LCFI142:
	mflr 0	 #,
.LCFI143:
	stmw 27,12(1)	 #,
.LCFI144:
	.loc 1 783 0
	mr. 27,5	 # nr_segs, nr_segs
	.loc 1 773 0
	mr 30,3	 # type, type
	stw 0,36(1)	 #,
.LCFI145:
	.loc 1 773 0
	mr 31,4	 # uvector, uvector
	mr 29,8	 # ret_pointer, ret_pointer
	.loc 1 776 0
	mr 28,7	 # iov, fast_pointer
	.loc 1 784 0
	li 0,0	 # ret,
	.loc 1 783 0
	beq- 0,.L495	 #
	.loc 1 792 0
	cmplwi 7,27,1024	 #, tmp160, nr_segs
	.loc 1 793 0
	li 0,-22	 # ret,
	.loc 1 792 0
	bgt- 7,.L495	 #
	.loc 1 796 0
	cmplw 7,27,6	 # fast_segs, tmp161, nr_segs
	ble- 7,.L496	 #
.LBB1538:
.LBB1539:
	.file 21 "./include/linux/slab.h"
	.loc 21 495 0
	lis 4,0x140	 # tmp190,
	slwi 3,27,3	 #, nr_segs,
	ori 4,4,192	 #,, tmp190,
	bl __kmalloc	 #
.LBE1539:
.LBE1538:
	.loc 1 799 0
	li 0,-12	 # ret,
	.loc 1 798 0
	mr. 28,3	 # iov,
	beq- 0,.L495	 #
.L496:
.LBB1540:
.LBB1541:
.LBB1542:
.LBB1543:
	.loc 19 113 0
	lwz 0,1128(2)	 # current.192_185->thread.fs.seg, D.39493
.LBE1543:
.LBE1542:
.LBE1541:
.LBE1540:
	.loc 1 803 0
	slwi 27,27,3	 # n, nr_segs,
.LBB1549:
.LBB1548:
.LBB1547:
.LBB1546:
	mr 5,27	 # n, n
	.loc 19 113 0
	cmplw 7,31,0	 # D.39493, tmp165, uvector
	bgt- 7,.L497	 #
	addi 9,27,-1	 # tmp166, n,
	subf 0,31,0	 # tmp167, uvector, D.39493
	cmplw 7,9,0	 # tmp167, tmp168, tmp166
	bgt- 7,.L497	 #
.LBB1544:
.LBB1545:
	.file 22 "./arch/powerpc/include/asm/uaccess.h"
	.loc 22 305 0
	mr 3,28	 #, iov
	mr 4,31	 #, uvector
	bl __copy_tofrom_user	 #
.LBE1545:
.LBE1544:
	.loc 19 115 0
	mr. 5,3	 # n,
	beq+ 0,.L498	 #
.L497:
	.loc 19 116 0
	subf 3,5,27	 # tmp170, n, n
	li 4,0	 #,
	add 3,28,3	 #, iov, tmp170
	bl memset	 #
	b .L510	 #
.L498:
.LBE1546:
.LBE1547:
.LBE1548:
.LBE1549:
.LBB1550:
	.loc 1 833 0
	lis 9,0x2000	 #,
	.loc 1 828 0
	cmpwi 6,30,0	 #, tmp186, type
	.loc 1 833 0
	lis 6,0x7fff	 # tmp189,
	mtctr 9	 # tmp188,
	.loc 1 829 0
	mr 5,2	 # current.121, current
	li 8,0	 # ivtmp.533,
	li 0,0	 # ret,
	.loc 1 833 0
	ori 6,6,61440	 #, tmp187, tmp189,
.L501:
	.loc 1 819 0
	mr 11,28	 # D.40168, iov
	lwzux 10,11,8	 # MEM[base: iov_4, index: ivtmp.533_176, offset: 0B], buf
	.loc 1 820 0
	lwz 9,4(11)	 # MEM[base: D.40168_211, offset: 4B], len
	.loc 1 824 0
	cmpwi 7,9,0	 #, tmp175, len
	blt- 7,.L508	 #
	.loc 1 828 0
	blt- 6,.L499	 #
	.loc 1 829 0
	lwz 7,1128(5)	 # current.121_33->thread.fs.seg, D.36573
	cmplw 1,10,7	 # D.36573, tmp177, buf
	bgt- 1,.L510	 #
	.loc 1 829 0 is_stmt 0 discriminator 1
	beq- 7,.L499	 #
	.loc 1 829 0 discriminator 4
	addi 4,9,-1	 # tmp179, len,
	subf 10,10,7	 # tmp180, buf, D.36573
	cmplw 7,4,10	 # tmp180, tmp181, tmp179
	bgt- 7,.L510	 #
.L499:
	.loc 1 833 0 is_stmt 1
	subf 10,0,6	 # len, ret, tmp187
	cmpw 7,9,10	 # len, tmp183, len
	ble- 7,.L500	 #
	.loc 1 835 0
	stw 10,4(11)	 # MEM[base: D.40168_211, offset: 4B], len
	mr 9,10	 # len, len
.L500:
	.loc 1 837 0
	add 0,0,9	 # ret, ret, len
	addi 8,8,8	 # ivtmp.533, ivtmp.533,
.LBE1550:
	.loc 1 818 0
	bdnz .L501	 #
	b .L495	 #
.L508:
.LBB1551:
	.loc 1 825 0
	li 0,-22	 # ret,
	b .L495	 #
.L510:
	.loc 1 830 0
	li 0,-14	 # ret,
.L495:
.LBE1551:
	.loc 1 842 0
	addi 11,1,32	 #,,
	.loc 1 840 0
	stw 28,0(29)	 # *ret_pointer_53(D), iov
	.loc 1 842 0
	mr 3,0	 #, ret
	b _restgpr_27_x	 #
.LFE2683:
	.size	rw_copy_check_uvector,.-rw_copy_check_uvector

[-- Attachment #3: the same without INLINE_COPY_FROM_USER --]
[-- Type: text/plain, Size: 2872 bytes --]

	.globl rw_copy_check_uvector
	.type	rw_copy_check_uvector, @function
rw_copy_check_uvector:
.LFB2683:
	.loc 1 773 0
	stwu 1,-32(1)	 #,,
.LCFI142:
	mflr 0	 #,
.LCFI143:
	stmw 27,12(1)	 #,
.LCFI144:
	.loc 1 783 0
	mr. 27,5	 # nr_segs, nr_segs
	.loc 1 773 0
	mr 31,3	 # type, type
	stw 0,36(1)	 #,
.LCFI145:
	.loc 1 773 0
	mr 30,4	 # uvector, uvector
	mr 29,8	 # ret_pointer, ret_pointer
	.loc 1 776 0
	mr 28,7	 # iov, fast_pointer
	.loc 1 784 0
	li 0,0	 # ret,
	.loc 1 783 0
	beq- 0,.L495	 #
	.loc 1 792 0
	cmplwi 7,27,1024	 #, tmp151, nr_segs
	.loc 1 793 0
	li 0,-22	 # ret,
	.loc 1 792 0
	bgt- 7,.L495	 #
	.loc 1 796 0
	cmplw 7,27,6	 # fast_segs, tmp152, nr_segs
	ble- 7,.L496	 #
.LBB1516:
.LBB1517:
	.file 21 "./include/linux/slab.h"
	.loc 21 495 0
	lis 4,0x140	 # tmp175,
	slwi 3,27,3	 #, nr_segs,
	ori 4,4,192	 #,, tmp175,
	bl __kmalloc	 #
.LBE1517:
.LBE1516:
	.loc 1 799 0
	li 0,-12	 # ret,
	.loc 1 798 0
	mr. 28,3	 # iov,
	beq- 0,.L495	 #
.L496:
	.loc 1 803 0
	slwi 27,27,3	 # n, nr_segs,
.LBB1518:
.LBB1519:
	.loc 19 153 0
	mr 3,28	 #, iov
	mr 4,30	 #, uvector
	mr 5,27	 #, n
	bl _copy_from_user	 #
.LBE1519:
.LBE1518:
	.loc 1 804 0
	li 0,-14	 # ret,
	.loc 1 803 0
	cmpwi 7,3,0	 #, tmp156,
	bne- 7,.L495	 #
.LBB1520:
	.loc 1 833 0
	addi 27,27,-8	 # tmp172, n,
	.loc 1 828 0
	cmpwi 6,31,0	 #, tmp168, type
	.loc 1 833 0
	srwi 27,27,3	 # tmp173, tmp172,
	lis 6,0x7fff	 # tmp174,
	addi 27,27,1	 #, tmp173,
	.loc 1 829 0
	mr 5,2	 # current.121, current
	.loc 1 833 0
	mtctr 27	 # tmp170,
	.loc 1 829 0
	li 8,0	 # ivtmp.528,
	li 0,0	 # ret,
	.loc 1 833 0
	ori 6,6,61440	 #, tmp169, tmp174,
.L499:
	.loc 1 819 0
	mr 11,28	 # D.40034, iov
	lwzux 10,11,8	 # MEM[base: iov_4, index: ivtmp.528_176, offset: 0B], buf
	.loc 1 820 0
	lwz 9,4(11)	 # MEM[base: D.40034_183, offset: 4B], len
	.loc 1 824 0
	cmpwi 7,9,0	 #, tmp157, len
	blt- 7,.L505	 #
	.loc 1 828 0
	blt- 6,.L497	 #
	.loc 1 829 0
	lwz 7,1128(5)	 # current.121_33->thread.fs.seg, D.36573
	cmplw 1,10,7	 # D.36573, tmp159, buf
	bgt- 1,.L507	 #
	.loc 1 829 0 is_stmt 0 discriminator 1
	beq- 7,.L497	 #
	.loc 1 829 0 discriminator 4
	addi 4,9,-1	 # tmp161, len,
	subf 10,10,7	 # tmp162, buf, D.36573
	cmplw 7,4,10	 # tmp162, tmp163, tmp161
	bgt- 7,.L507	 #
.L497:
	.loc 1 833 0 is_stmt 1
	subf 10,0,6	 # len, ret, tmp169
	cmpw 7,9,10	 # len, tmp165, len
	ble- 7,.L498	 #
	.loc 1 835 0
	stw 10,4(11)	 # MEM[base: D.40034_183, offset: 4B], len
	mr 9,10	 # len, len
.L498:
	.loc 1 837 0
	add 0,0,9	 # ret, ret, len
	addi 8,8,8	 # ivtmp.528, ivtmp.528,
.LBE1520:
	.loc 1 818 0
	bdnz .L499	 #
	b .L495	 #
.L505:
.LBB1521:
	.loc 1 825 0
	li 0,-22	 # ret,
	b .L495	 #
.L507:
	.loc 1 830 0
	li 0,-14	 # ret,
.L495:
.LBE1521:
	.loc 1 842 0
	addi 11,1,32	 #,,
	.loc 1 840 0
	stw 28,0(29)	 # *ret_pointer_53(D), iov
	.loc 1 842 0
	mr 3,0	 #, ret
	b _restgpr_27_x	 #
.LFE2683:
	.size	rw_copy_check_uvector,.-rw_copy_check_uvector

  reply	other threads:[~2017-06-25 20:53 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-01 16:39 Regression in kernel 4.12-rc1 for Powerpc 32 - partially bisected Larry Finger
2017-06-21 15:10 ` Regression in kernel 4.12-rc1 for Powerpc 32 - bisected to commit 3448890c32c3 Larry Finger
2017-06-21 21:22   ` Al Viro
2017-06-21 21:31     ` Larry Finger
2017-06-21 21:34       ` Al Viro
2017-06-21 21:49         ` Larry Finger
2017-06-22 14:12           ` Al Viro
2017-06-22 14:19             ` Larry Finger
2017-06-22 19:25               ` Al Viro
2017-06-22 21:41                 ` Al Viro
2017-06-23 18:49                 ` Larry Finger
2017-06-23 20:29                   ` Al Viro
2017-06-24  0:04                     ` Larry Finger
2017-06-24 17:29                     ` Larry Finger
2017-06-25  9:53                       ` Al Viro
2017-06-25 11:14                         ` Al Viro
2017-06-25 20:53                           ` Al Viro [this message]
2017-06-25 21:44                             ` gcc 4.6.3 miscompile on ppc32 (was Re: Regression in kernel 4.12-rc1 for Powerpc 32 - bisected to commit 3448890c32c3) Segher Boessenkool
2017-06-25 22:21                               ` Al Viro
2017-06-26 13:37                                 ` Michael Ellerman
2017-06-26 13:40                       ` Regression in kernel 4.12-rc1 for Powerpc 32 - bisected to commit 3448890c32c3 Michael Ellerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170625205324.GW10672@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=Larry.Finger@lwfinger.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=regressions@leemhuis.info \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.