public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* Processes hang in an unkillable state
@ 2011-04-12 10:32 Robert Święcki
  2011-04-12 12:44 ` Américo Wang
  0 siblings, 1 reply; 19+ messages in thread
From: Robert Święcki @ 2011-04-12 10:32 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 481 bytes --]

Hi, while fuzzing Linux system calls (32bit fuzzer, 64bi linux
kernel), it happens after some time (10-20mins) that some processes
enter a state which makes them un-killable. They are either in R or D
state.

# strace ps wwuax
...
...
open("/proc/450/cmdline", O_RDONLY)     = 6
read(6,  -             hangs....

# kill -9 450
# kill -9 450 (no ESRCH)

More data in the attachment - I'll keep it in the kdb session for
further examination.

-- 
Robert Święcki

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 450.txt --]
[-- Type: text/plain; charset=GB2312; name="450.txt", Size: 9154 bytes --]

root@ise-test:/proc/450# uname -a
Linux ise-test 2.6.38 #6 SMP PREEMPT Tue Apr 12 11:25:19 CEST 2011 x86_64 GNU/Linux

root@ise-test:/proc/450# cat status
Name:	iknowthis
State:	D (disk sleep)
Tgid:	450
Pid:	450
PPid:	1
TracerPid:	0
Uid:	1001	1001	1001	1001
Gid:	1001	1001	1001	1001
FDSize:	64
Groups:	1001 
VmPeak:	   12292 kB
VmSize:	   11116 kB
VmLck:	      44 kB
VmHWM:	    1904 kB
VmRSS:	    1324 kB
VmData:	     712 kB
VmStk:	    7616 kB
VmExe:	     160 kB
VmLib:	    2556 kB
VmPTE:	      28 kB
VmSwap:	       0 kB
Threads:	1
SigQ:	785/16382
SigPnd:	0000000000000100   ?? SIGKILL
ShdPnd:	0000000000000100
SigBlk:	0000000000000000
SigIgn:	0000000001001000
SigCgt:	0000000000000000
CapInh:	0000000000000000
CapPrm:	0000000000000000
CapEff:	0000000000000000
CapBnd:	ffffffffffffffff
Cpus_allowed:	ff
Cpus_allowed_list:	0-7
Mems_allowed:	00000000,00000001
Mems_allowed_list:	0
voluntary_ctxt_switches:	2303
nonvoluntary_ctxt_switches:	287

# cat wchan 
call_rwsem_down_write_failed

root@ise-test:/proc/450# ls -l fd/
total 0
lrwx------ 1 test test 64 2011-04-12 12:17 0 -> /dev/pts/1
lrwx------ 1 test test 64 2011-04-12 12:17 1 -> /dev/pts/1
lr-x------ 1 test test 64 2011-04-12 12:17 10 -> /dev/shm/??(?x??Ӑd?=R1Y ??C??\:?i}?)B????d!0?E?g??? (deleted)
lr-x------ 1 test test 64 2011-04-12 12:17 11 -> /home
l-wx------ 1 test test 64 2011-04-12 12:17 12 -> /var/lock/?=ݾ+????7???????@N??eѵj?P5´?~?w?PomYK??ny_???ulv?
l-wx------ 1 test test 64 2011-04-12 12:17 13 -> /var/lock/) ?????????`v?-ҩuQ?????O?갈????Nnhc ??N?I?G???Ȩ??????!?{V??ssifs>???N????q?I?ل??!T>??3??z?tI4?B?X"T??ӓ (deleted)
lr-x------ 1 test test 64 2011-04-12 12:17 14 -> /dev/shm/????]?1???B??c?T?eu????X????????????^???~jF???-?e?m?u
l-wx------ 1 test test 64 2011-04-12 12:17 15 -> /var/lock/?笉˩F?q?jAHu??o ??Ǘu???????O?;B??QmSX???p?߃?S??w{??,?)???D??V???q?ø#??
lrwx------ 1 test test 64 2011-04-12 12:17 2 -> /dev/pts/1
lr-x------ 1 test test 64 2011-04-12 12:17 3 -> /dev/urandom
lrwx------ 1 test test 64 2011-04-12 12:17 4 -> anon_inode:[timerfd]
lr-x------ 1 test test 64 2011-04-12 12:17 5 -> /sys/kernel/debug/asoc/codecs
lr-x------ 1 test test 64 2011-04-12 12:17 6 -> /proc/i2o
l-wx------ 1 test test 64 2011-04-12 12:17 7 -> /dev/shm/?????z?u????????Z???n"?d?i3Ǘ??????E??^?????O?}7+?[??#!???&A?R~&??????(։?w???\w?c"E?&?"?嵿?)ʝ?+?-?M_??R?Y??R???6?FAhGu?R?6?k??????l?B?w??~?Ց?^??i?????[.? (deleted)
lr-x------ 1 test test 64 2011-04-12 12:17 8 -> /var/lock/?5?ݕ?C???\#]???b-??????q=???,?v?]???????a?i???ΐ??\??HTb??c?@\0?M???????????|???x?X?[{&+?$?X???????9?
l-wx------ 1 test test 64 2011-04-12 12:17 9 -> /var/lock/BF}???P(??`?ϰ??.???

root@ise-test:/proc/450# for x in fdinfo/*; do echo == $x ==; cat "$x"; done
== fdinfo/0 ==
pos:	0
flags:	0100002
== fdinfo/1 ==
pos:	0
flags:	0100002
== fdinfo/10 ==
pos:	1351256300595494888
flags:	024240004004
== fdinfo/11 ==
pos:	1084786014
flags:	04040620010
== fdinfo/12 ==
pos:	1811044362
flags:	0100001
== fdinfo/13 ==
pos:	3226073394
flags:	0100001
== fdinfo/14 ==
pos:	803230536
flags:	020026710024
== fdinfo/15 ==
pos:	1103604314
flags:	0100001
== fdinfo/2 ==
pos:	0
flags:	0100002
== fdinfo/3 ==
pos:	0
flags:	00
== fdinfo/4 ==
pos:	0
flags:	02
== fdinfo/5 ==
pos:	7081918111360778253
flags:	020200004044
== fdinfo/6 ==
pos:	2075525754
flags:	0200200000
== fdinfo/7 ==
pos:	2145128454
flags:	0100001
== fdinfo/8 ==
pos:	1343491212
flags:	020471010020
== fdinfo/9 ==
pos:	1073743904
flags:	01100001

(gdb) attach 450
Attaching to process 450
[hangs]

root@ise-test:~# cd /proc/450/
root@ise-test:/proc/450# ls
[hangs, but it worked]

root@ise-test:~# cat /proc/mounts
rootfs / rootfs rw 0 0
/dev/root / ext4 rw,relatime,errors=remount-ro,barrier=1,data=ordered 0 0
devtmpfs /dev devtmpfs rw,relatime,size=1981824k,nr_inodes=495456,mode=755 0 0
none /proc proc rw,nosuid,nodev,noexec,relatime 0 0
none /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0
none /proc/sys/fs/binfmt_misc binfmt_misc rw,nosuid,nodev,noexec,relatime 0 0
none /sys/fs/fuse/connections fusectl rw,relatime 0 0
none /sys/kernel/debug debugfs rw,relatime 0 0
none /sys/kernel/security securityfs rw,relatime 0 0
none /dev/pts devpts rw,nosuid,noexec,relatime,gid=5,mode=620,ptmxmode=000 0 0
none /dev/shm tmpfs rw,nosuid,nodev,relatime 0 0
none /var/run tmpfs rw,nosuid,relatime,mode=755 0 0
none /var/lock tmpfs rw,nosuid,nodev,noexec,relatime 0 0

root@ise-test:~# ls -l /proc/450/root
lrwxrwxrwx 1 test test 0 2011-04-12 12:17 /proc/450/root -> /

root@ise-test:~# cat /proc/450/smaps
[hangs]

root@ise-test:~# cat /proc/450/syscall 
7 0xac4 0xff806d88 0x40000000 0x96 0xac5 0xff806da8 0xff806d18 0xffffe430
[0]kdb> bt
Stack traceback for pid 450
0xffff880118905cc0      450        1  0    0   D  0xffff880118906140  iknowthis
<c> ffff88011da8fb58<c> 0000000000000086<c> ffff88012bffbe0a<c> 0000000000000001<c>
<c> ffff880132373331<c> ffff88011da8e000<c> ffff88011da8e000<c> ffff880118905cc0<c>
<c> ffff88011da8e010<c> ffff880118906078<c> ffff88011da8ffd8<c> ffff88011da8ffd8<c>
Call Trace:
 [<ffffffff8224f665>] rwsem_down_failed_common+0xc5/0x160
 [<ffffffff8224f713>] rwsem_down_write_failed+0x13/0x20
 [<ffffffff815956c3>] call_rwsem_down_write_failed+0x13/0x20
 [<ffffffff8224e985>] ? down_write+0x25/0x30
 [<ffffffff811b0819>] do_coredump+0x139/0xbb0
 [<ffffffff810a4e09>] ? sub_preempt_count+0xa9/0xe0
 [<ffffffff810cb424>] ? __dequeue_signal+0xf4/0x190
 [<ffffffff810cbb4b>] get_signal_to_deliver+0x29b/0x4a0
 [<ffffffff8104e365>] do_signal+0x75/0x7b0
 [<ffffffff810f8687>] ? compat_sys_wait4+0x97/0xe0
 [<ffffffff810b5322>] ? do_fork+0xe2/0x370
 [<ffffffff8104eb05>] do_notify_resume+0x65/0x90
 [<ffffffff8224f859>] ? trace_hardirqs_on_thunk+0x3a/0x3c
 [<ffffffff8104f378>] int_signal+0x12/0x17

[0]kdb> btc 0
Stack traceback for pid 23592
0xffff880112c04590    23592        1  1    0   R  0xffff880112c04a10 *iknowthis
<c> ffff8800cfc03dc8<c> 0000000000000018<c>
Call Trace:
 <#DB>  <<EOE>>  <IRQ>  [<ffffffff816cc6b9>] ? __handle_sysrq+0x129/0x190
 [<ffffffff816cc926>] ? handle_sysrq+0x26/0x30
 [<ffffffff816e4b8d>] ? serial8250_handle_port+0x21d/0x320
 [<ffffffff816e4d2b>] ? serial8250_interrupt+0x9b/0x140
 [<ffffffff8111a8b8>] ? handle_IRQ_event+0x68/0x1b0
 [<ffffffff8111e7b5>] ? move_native_irq+0x15/0x70
 [<ffffffff8111ceae>] ? handle_edge_irq+0xbe/0x170
 [<ffffffff81052319>] ? handle_irq+0x49/0xa0
 [<ffffffff82250dfd>] ? do_IRQ+0x5d/0xe0
 [<ffffffff82250693>] ? ret_from_intr+0x0/0x1a
 <EOI>  [<ffffffff8117a7b8>] ? do_mlock_pages+0x78/0x150
 [<ffffffff8117a7b8>] ? do_mlock_pages+0x78/0x150
 [<ffffffff8117b21c>] ? sys_mlock+0x11c/0x130
 [<ffffffff8109a653>] ? ia32_sysret+0x0/0x5
[0]kdb> btc 1
Stack traceback for pid 15223
0xffff88011ebb4590    15223     5594  1    1   R  0xffff88011ebb4a10  iknowthis
<c> ffff88011e529c48<c> 0000000000000018<c>
Call Trace:


[0]kdb> summary
sysname    Linux
release    2.6.38
version    #6 SMP PREEMPT Tue Apr 12 11:25:19 CEST 2011
machine    x86_64
nodename   ise-test
domainname (none)
ccversion  CCVERSION
date       2011-04-12 10:24:42 tz_minuteswest -120
uptime     00:42
load avg   38.13 35.98 28.81

MemTotal:         992186 kB
MemFree:          899924 kB
Buffers:            5411 kB


after go in kdb, and another kdb session:

[0]kdb> btc 0
Stack traceback for pid 15802
0xffff88011e818000    15802        1  1    0   R  0xffff88011e818480 *iknowthis
<c> ffff8800cfc03dc8<c> 0000000000000018<c>
Call Trace:
 <#DB>  <<EOE>>  <IRQ>  [<ffffffff816cc6b9>] ? __handle_sysrq+0x129/0x190
 [<ffffffff816cc926>] ? handle_sysrq+0x26/0x30
 [<ffffffff816e4b8d>] ? serial8250_handle_port+0x21d/0x320
 [<ffffffff816e4d2b>] ? serial8250_interrupt+0x9b/0x140
 [<ffffffff8111a8b8>] ? handle_IRQ_event+0x68/0x1b0
 [<ffffffff8111e7b5>] ? move_native_irq+0x15/0x70
 [<ffffffff8111ceae>] ? handle_edge_irq+0xbe/0x170
 [<ffffffff81052319>] ? handle_irq+0x49/0xa0
 [<ffffffff82250dfd>] ? do_IRQ+0x5d/0xe0
 [<ffffffff82250693>] ? ret_from_intr+0x0/0x1a
 <EOI>  [<ffffffff82250705>] ? restore_args+0x0/0x30
 [<ffffffff8117a71d>] ? __mlock_vma_pages_range+0x9d/0xc0
 [<ffffffff8117a703>] ? __mlock_vma_pages_range+0x83/0xc0
 [<ffffffff8117a7b8>] ? do_mlock_pages+0x78/0x150
 [<ffffffff8117b21c>] ? sys_mlock+0x11c/0x130
 [<ffffffff8109a653>] ? ia32_sysret+0x0/0x5


and another go/kdb:

[0]kdb> btc 0
Stack traceback for pid 15802
0xffff88011e818000    15802        1  1    0   R  0xffff88011e818480 *iknowthis
<c> ffff8800cfc03dc8<c> 0000000000000018<c>
Call Trace:
 <#DB>  <<EOE>>  <IRQ>  [<ffffffff816cc6b9>] ? __handle_sysrq+0x129/0x190
 [<ffffffff816cc926>] ? handle_sysrq+0x26/0x30
 [<ffffffff816e4b8d>] ? serial8250_handle_port+0x21d/0x320
 [<ffffffff816e4d2b>] ? serial8250_interrupt+0x9b/0x140
 [<ffffffff8111a8b8>] ? handle_IRQ_event+0x68/0x1b0
 [<ffffffff8111e7b5>] ? move_native_irq+0x15/0x70
 [<ffffffff8111ceae>] ? handle_edge_irq+0xbe/0x170
 [<ffffffff81052319>] ? handle_irq+0x49/0xa0
 [<ffffffff82250dfd>] ? do_IRQ+0x5d/0xe0
 [<ffffffff82250693>] ? ret_from_intr+0x0/0x1a
 <EOI>  [<ffffffff8117a7d2>] ? do_mlock_pages+0x92/0x150
 [<ffffffff8117a7b8>] ? do_mlock_pages+0x78/0x150
 [<ffffffff8117b21c>] ? sys_mlock+0x11c/0x130
 [<ffffffff8109a653>] ? ia32_sysret+0x0/0x5

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2011-04-13 12:19 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-12 10:32 Processes hang in an unkillable state Robert Święcki
2011-04-12 12:44 ` Américo Wang
2011-04-12 13:03   ` Robert Święcki
2011-04-12 13:08     ` Robert Święcki
2011-04-12 18:28   ` Oleg Nesterov
2011-04-12 18:34     ` Robert Święcki
2011-04-12 19:20       ` Oleg Nesterov
     [not found]         ` <BANLkTimJgx_x=0bpBDWaUkJgZPYs3_iwsg@mail.gmail.com>
     [not found]           ` <BANLkTimBegMUq2fX67o2YnoATWG-NDbKgA@mail.gmail.com>
2011-04-12 20:22             ` Linus Torvalds
     [not found]               ` <BANLkTi=HyK+8DhJS3db0j2fefdOjwVMN3w@mail.gmail.com>
2011-04-12 21:46                 ` Linus Torvalds
2011-04-12 21:59                   ` Robert Święcki
2011-04-12 22:12                     ` Linus Torvalds
2011-04-12 22:16                       ` Robert Święcki
2011-04-12 22:18                         ` Linus Torvalds
2011-04-12 22:30                           ` Robert Święcki
2011-04-12 22:43                             ` Linus Torvalds
2011-04-13 12:19                               ` Robert Święcki
2011-04-12 19:07     ` [PATCH 0/1] Was: " Oleg Nesterov
2011-04-12 19:08       ` [PATCH 1/1] __mlock_vma_pages_range: stack_guard_page() case returns the wrong value Oleg Nesterov
2011-04-12 19:18         ` Robert Święcki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox