From mboxrd@z Thu Jan  1 00:00:00 1970
From: Guangwen Feng <fenggw-fnst@cn.fujitsu.com>
Date: Mon, 10 Oct 2016 15:05:54 +0800
Subject: [LTP] [PATCH] syscalls/signal06: fix test for regression with
 earlier version of gcc and kernel
In-Reply-To: <20161006111548.GA9711@rei>
References: <1470380281-20334-1-git-send-email-fenggw-fnst@cn.fujitsu.com>
 <20161005134303.GA23476@rei.lan> <57F627EF.7070605@cn.fujitsu.com>
 <20161006111548.GA9711@rei>
Message-ID: <57FB3DD2.1070809@cn.fujitsu.com>
List-Id: <ltp.lists.linux.it>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
To: ltp@lists.linux.it

Hi!

On 10/06/2016 07:15 PM, Cyril Hrubis wrote:
> Hi!
>>> Hmm, what is the exact problem here? Does the old kernel break if we
>>> send the signal too fast?
>>
>> Yes, running signal06 reports segmentation fault and breaks the test
>> if we send the signal too fast on the old kernel.
> 
> Hmm, that sounds like a bug itself since we set signal handler for the
> SIGSEGV, it shouldn't kill the process while we try to write to the read
> only memory. Can you try to run it in a debugger and send a trace?

Yes, I guess this is the kernel's bug, but sorry I didn't look into
the real reason of the segfault issue much.

[root@RHEL5U11ga_Intel64 signal]# gdb ./signal06 core.18623 
GNU gdb (GDB) Red Hat Enterprise Linux (7.0.1-45.el5)
Copyright (C) 2009 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/ltp/testcases/kernel/syscalls/signal/signal06...done.
[New Thread 18623]
[New Thread 18625]
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done.
[Thread debugging using libthread_db enabled]
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2

warning: no loadable sections found in added symbol-file system-supplied DSO at 0x7fffe3384000
Core was generated by `./signal06'.
Program terminated with signal 11, Segmentation fault.
#0  test (d=123.456) at signal06.c:71
71		while (D == d && loop < LOOPS) {
(gdb) bt
#0  test (d=123.456)@signal06.c:71
#1  0x0000000000402bb7 in main (ac=<value optimized out>, av=<value optimized out>) at signal06.c:151

(gdb) disassemble 
Dump of assembler code for function test:
0x0000000000402910 <test+0>:	push   %rbx
0x0000000000402911 <test+1>:	sub    $0x10,%rsp
0x0000000000402915 <test+5>:	movsd  %xmm0,0x8(%rsp)
0x000000000040291b <test+11>:	callq  0x402278 <getpid@plt>
0x0000000000402920 <test+16>:	movsd  0x8(%rsp),%xmm0
0x0000000000402926 <test+22>:	mov    %eax,%edi
0x0000000000402928 <test+24>:	movsd  %xmm0,0x2136d0(%rip)        # 0x616000 <D>
0x0000000000402930 <test+32>:	movsd  0x2136c8(%rip),%xmm0        # 0x616000 <D>
0x0000000000402938 <test+40>:	ucomisd 0x8(%rsp),%xmm0
0x000000000040293e <test+46>:	jne    0x4029fc <test+236>
0x0000000000402944 <test+52>:	jp     0x4029fc <test+236>
0x000000000040294a <test+58>:	xor    %ebx,%ebx
0x000000000040294c <test+60>:	mov    $0xc8,%edx
0x0000000000402951 <test+65>:	mov    $0x1,%esi
0x0000000000402956 <test+70>:	jmp    0x402962 <test+82>
0x0000000000402958 <test+72>:	cmp    $0x2710,%ebx
0x000000000040295e <test+78>:	xchg   %ax,%ax
0x0000000000402960 <test+80>:	je     0x4029cf <test+191>
0x0000000000402962 <test+82>:	mov    %edx,%eax
0x0000000000402964 <test+84>:	syscall 
0x0000000000402966 <test+86>:	movsd  0x213692(%rip),%xmm0        # 0x616000 <D>
0x000000000040296e <test+94>:	add    $0x1,%ebx
0x0000000000402971 <test+97>:	ucomisd 0x8(%rsp),%xmm0
0x0000000000402977 <test+103>:	jp     0x40297b <test+107>
0x0000000000402979 <test+105>:	je     0x402958 <test+72>
0x000000000040297b <test+107>:	xor    %eax,%eax
0x000000000040297d <test+109>:	mov    %ebx,%r8d
0x0000000000402980 <test+112>:	mov    $0x40b2cb,%ecx
0x0000000000402985 <test+117>:	mov    $0x10,%edx
0x000000000040298a <test+122>:	mov    $0x51,%esi
0x000000000040298f <test+127>:	mov    $0x40b2c0,%edi
0x0000000000402994 <test+132>:	movl   $0x1,0x21e662(%rip)        # 0x621000 <FLAGE>
0x000000000040299e <test+142>:	callq  0x4043d0 <tst_resm_>
0x00000000004029a3 <test+147>:	cmp    $0x2710,%ebx
0x00000000004029a9 <test+153>:	jne    0x402a24 <test+276>
0x00000000004029ab <test+155>:	mov    0x20ec16(%rip),%r8        # 0x6115c8 <TCID>
0x00000000004029b2 <test+162>:	add    $0x10,%rsp
0x00000000004029b6 <test+166>:	mov    $0x40b2d5,%ecx
0x00000000004029bb <test+171>:	pop    %rbx
0x00000000004029bc <test+172>:	xor    %edx,%edx
0x00000000004029be <test+174>:	mov    $0x54,%esi
0x00000000004029c3 <test+179>:	mov    $0x40b2c0,%edi
0x00000000004029c8 <test+184>:	xor    %eax,%eax
0x00000000004029ca <test+186>:	jmpq   0x4043d0 <tst_resm_>
0x00000000004029cf <test+191>:	mov    $0x2710,%r8d
0x00000000004029d5 <test+197>:	mov    $0x40b2cb,%ecx
0x00000000004029da <test+202>:	mov    $0x10,%edx
0x00000000004029df <test+207>:	mov    $0x51,%esi
0x00000000004029e4 <test+212>:	mov    $0x40b2c0,%edi
0x00000000004029e9 <test+217>:	xor    %eax,%eax
0x00000000004029eb <test+219>:	movl   $0x1,0x21e60b(%rip)        # 0x621000 <FLAGE>
0x00000000004029f5 <test+229>:	callq  0x4043d0 <tst_resm_>
0x00000000004029fa <test+234>:	jmp    0x4029ab <test+155>
0x00000000004029fc <test+236>:	xor    %r8d,%r8d
0x00000000004029ff <test+239>:	mov    $0x40b2cb,%ecx
0x0000000000402a04 <test+244>:	mov    $0x10,%edx
0x0000000000402a09 <test+249>:	mov    $0x51,%esi
0x0000000000402a0e <test+254>:	mov    $0x40b2c0,%edi
0x0000000000402a13 <test+259>:	xor    %eax,%eax
0x0000000000402a15 <test+261>:	movl   $0x1,0x21e5e1(%rip)        # 0x621000 <FLAGE>
0x0000000000402a1f <test+271>:	callq  0x4043d0 <tst_resm_>
0x0000000000402a24 <test+276>:	mov    $0x40b2e7,%ecx
0x0000000000402a29 <test+281>:	mov    $0x1,%edx
0x0000000000402a2e <test+286>:	mov    $0x56,%esi
0x0000000000402a33 <test+291>:	mov    $0x40b2c0,%edi
0x0000000000402a38 <test+296>:	xor    %eax,%eax
0x0000000000402a3a <test+298>:	callq  0x4043d0 <tst_resm_>
0x0000000000402a3f <test+303>:	callq  0x404670 <tst_exit>
End of assembler dump.

> 
> Since what we do is disable/enable write access to the alternative while
> simoutaneously hammer the process with SIGHUP in order to trigger
> segfault inside of the sighup signal handler.
> 
> I wonder why we have to do that asynchronously in the first place, if
> there is a need to hit a particular spot in the kernel while the signal
> stack is being written.
> 
> Anyway on which systems is this bug reproducible? I can try to make it
> both reliable and fast.

Thanks very much.
The segfault issue can be reproduced steadily in RHEL5.11GA(2.6.18-398.el5).

> 
>>> I do not like much that the test takes ten times more time to finish
>>> now.
>>
>> Yes, it will take ten times more time than before, but it only takes
>> 3~4 seconds to finish...
> 
> Let me put that in perspective. We have more than 3000 tests in LTP, if
> each of these would take just a few seconds more, the test run would
> take a few more hours to finish.
> 
> And there is a reason why we need the LTP run to be as fast as possible.
> The longer the run takes the less people would use it and it would be
> used less frequently which would make all the work put into these
> testcases less valuable.
> 
> All in all I'm mildly opposed to each change that increses the runtime
> if it's not absolutely necessary.
> 

OK, I understand, will know for next time, thanks.

>> By current LOOPS(10000), most of the time, the buggy kernel can be
>> reproduced, but there is still a chance(about 0.5% in my environment)
>> to miss.
> 
> Hmm, what about increasing it twice or four times? What is the
> probability of missing the bug then?
> 

2 times(20000 loops):	99.9% reproducible
4 times(40000 loops):	100% reproducible

Is it acceptable to increase it four times?


Best Regards,
Guangwen Feng