Linux NFS development
 help / color / mirror / Atom feed
* linux 2.4.18-5 nfs/rpc problem
@ 2003-10-08 22:42 Guzovsky, Eduard
  2003-10-10 17:12 ` David Jeffery
  0 siblings, 1 reply; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-08 22:42 UTC (permalink / raw)
  To: 'nfs@lists.sourceforge.net'; +Cc: Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 9567 bytes --]

We ran into a deadlock problem running Red Hat linux 2.4.18-5
on a diskless machine. Root file system is mounted over 
udp nfs v3. The problem is created by an application reading,
writing, renaming, deleting files. This machine has 256MB
of memory.

Bellow is some relevant info from the deadlocked machine.

It looks like "rpciod" (see the stack of thread ID 8 in the 
gdb listing) hangs in __wait_on_page(page=0xc1241d7c) trying 
to do nfs_commit_done() on a stale file handle. At the same 
time "kupdated" (see the stack of thread ID 6) is stuck in 
nfs_wait_on_request() while trying to do nfs_writepage()
on the same page rpciod hangs. The problem goes away if the 
file system is mounted with "sync" option. 

Is this a known problem ? Is a fix available ?

Thanks,

-Ed



rpc_show_tasks() output:
------------------------

-pid- proc flgs runs status -client- -prog- --rqstp- -timeout -rpcwait
-action- --exit--
33180 0004 0000 0002 000120 cff38aa0 100003 cff32160 00000000  <NULL>
c02090a0        0
32943 0021 0001 0002 -00116 cff38aa0 100003 cff32b58 00000000  <NULL>
0 c0176d10
32942 0021 0001 0002 000128 cff38aa0 100003 cff32078 00000000   schedq
c02090a0 c0176d10
32941 0021 0001 0002 000128 cff38aa0 100003 cff32418 00000000   schedq
c02090a0 c0176d10
00156 0000 0081 0002 -00110 cff38b20 100003        0 00000000   schedq
c016f890 c016f990
00006 0000 0081 0002 -00110 cff38aa0 100003        0 00000000   schedq
c016f890 c016f990

kernel gdb session:
-------------------

(gdb) info threads
  26 Thread 1064  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  25 Thread 1055  sys_wait4 (pid=0xffffffff, stat_addr=0x0, options=0x0, 
    ru=0x0) at exit.c:649
  24 Thread 855  schedule_timeout (timeout=0xcc1d5f00) at timer.c:879
  23 Thread 825  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  22 Thread 782  schedule_timeout (timeout=0xcc2cbf00) at timer.c:879
  21 Thread 781  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  20 Thread 763  sys_msgrcv (msqid=0x0, msgp=0xbffffc40, msgsz=0x100, 
    msgtyp=0x1, msgflg=0x0) at msg.c:816
  19 Thread 745  schedule_timeout (timeout=0xcc3b3f74) at timer.c:879
  18 Thread 711  schedule_timeout (timeout=0xcc3d1f74) at timer.c:879
  17 Thread 691  schedule_timeout (timeout=0xcc3dff00) at timer.c:879
  16 Thread 673  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  15 Thread 655  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  14 Thread 616  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  13 Thread 582  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  12 Thread 498  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  11 Thread 463  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  10 Thread 445  do_syslog (type=0x2, buf=0x804df20 "u\b^[-bèyS",
len=0xfff)^[-A
    at printk.c:191
  9 Thread 440  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  8 Thread 8  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
  7 Thread 7  interruptible_sleep_on (q=0xc1393784) at sched.c:926
  6 Thread 6  nfs_wait_on_request (req=0xcff27ae0) at pagelist.c:223
  5 Thread 5  interruptible_sleep_on (q=0xc02c6d88) at sched.c:926
  4 Thread 4  schedule_timeout (timeout=0xc13a5f80) at timer.c:879
  3 Thread 3  ksoftirqd (__bind_cpu=0x0) at softirq.c:386
  2 Thread 2  context_thread (startup=0xc02f008c) at context.c:101
  1 Thread 1  schedule_timeout (timeout=0xc1357f00) at timer.c:879

[kupdated] info
---------------
(gdb) thread 6
[Switching to thread 6 (Thread 6)]#0  nfs_wait_on_request (req=0xcff27ae0)
    at pagelist.c:223
(gdb) bt
#0  nfs_wait_on_request (req=0xcff27ae0) at pagelist.c:223
#1  0xc0175636 in nfs_wait_on_requests (inode=0xcb931080, file=0x0, 
    idx_start=0x0, npages=0x1) at write.c:479
#2  0xc0177268 in nfs_sync_file (inode=0xcb931080, file=0x0, idx_start=0x0, 
    npages=0x1, how=0x6) at write.c:1307
#3  0xc0175436 in nfs_writepage (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/linux/nfs_fs.h:257
#4  0xc0127d14 in filemap_fdatasync (mapping=0xcb931134) at filemap.c:594
#5  0xc0149be1 in sync_unlocked_inodes () at inode.c:224
#6  0xc013b778 in sync_old_buffers () at buffer.c:2801
#7  0xc013ba72 in kupdate (startup=0xc02f0100) at buffer.c:2996
#8  0xc0107306 in kernel_thread (fn=0x202a2, arg=0xc1356000, flags=0x0)
    at process.c:499

(gdb) p *(struct nfs_page *)0xcff27ae0
$3 = {wb_hash = {next = 0xcff27ae0, prev = 0xcff27ae0}, wb_lru = {
    next = 0xcff27848, prev = 0xcff27788}, wb_list = {next = 0xcff27af0, 
    prev = 0xcff27af0}, wb_list_head = 0x0, wb_file = 0x0, 
  wb_inode = 0xcb931080, wb_cred = 0xcff38ba0, wb_page = 0xc1241d7c, 
  wb_wait = {lock = {gcc_is_buggy = 0x0}, task_list = {next = 0xc13a1eb0, 
      prev = 0xc13a1eb0}}, wb_timeout = 0xa785, wb_offset = 0x0, 
  wb_bytes = 0x400, wb_count = 0x3, wb_flags = 0x1, wb_verf = {
    committed = NFS_UNSTABLE, verifier = {0xbf766b3f, 0x87af0400}}}

deadlocking page info
---------------------
(gdb) p *(struct page *)(0xc1241d7c)
$6 = {list = {next = 0xcb931144, prev = 0xcb931144}, mapping = 0xcb931134, 
  index = 0x0, next_hash = 0x0, count = {counter = 0x4}, flags = 0x100004d, 
  lru = {next = 0xc125a468, prev = 0xc123b9dc}, age = 0x5, pte_chain = 0x0, 
  pprev_hash = 0xcffdee60, buffers = 0x0}
(gdb) p *((struct page *)(0xc1241d7c))->mapping
$7 = {clean_pages = {next = 0xcb931134, prev = 0xcb931134}, dirty_pages = {
    next = 0xcb93113c, prev = 0xcb93113c}, locked_pages = {next =
0xc1241d7c, 
    prev = 0xc1241d7c}, nrpages = 0x1, a_ops = 0xc02c8820, host =
0xcb931080, 
  i_mmap = 0x0, i_mmap_shared = 0x0, i_shared_lock = {gcc_is_buggy = 0x0}, 
  gfp_mask = 0x1d2}
(gdb) p *((struct page *)(0xc1241d7c))->mapping->a_ops
$8 = {writepage = 0xc0175400 <nfs_writepage>, 
  readpage = 0xc0174a50 <nfs_readpage>, 
  sync_page = 0xc016f400 <nfs_sync_page>, 
  prepare_write = 0xc016f3e0 <nfs_prepare_write>, 
  commit_write = 0xc016f3f0 <nfs_commit_write>, bmap = 0, flushpage = 0, 
  releasepage = 0, direct_IO = 0, removepage = 0}

[rpciod] info
-------------
(gdb) thread 8
[Switching to thread 8 (Thread 8)]#0  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
(gdb) bt
#0  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
#1  0xc01278d5 in truncate_list_pages (head=0xcb931144, start=0x0, 
    partial=0xcff2df1c)
    at /usr/src/linux_2.4.18.5/include/linux/pagemap.h:97
#2  0xc012796e in truncate_inode_pages (mapping=0xcb931134, lstart=0x0)
    at filemap.c:349
#3  0xc014acb2 in iput (inode=0xcb931080) at inode.c:1086
#4  0xc0176f45 in nfs_commit_done (task=0xcfe284c0) at write.c:353
#5  0xc020c71a in __rpc_execute (task=0xcfe284c0) at sched.c:648
#6  0xc020c909 in __rpc_schedule () at sched.c:733
#7  0xc020d168 in rpciod (ptr=0xc02d954c) at sched.c:1076
#8  0xc0107306 in kernel_thread (fn=0xc0316660 <tcp_hashinfo>,
arg=0xc139fc00, 
    flags=0x0) at process.c:499
#9  0x00000008 in ?? ()

rpc_task (tk_pid 32943) info (note: tk_status = -116 ESTALE)
------------------------------------------------------------
(gdb) p *(struct rpc_task *)0xcfe284c0
$9 = {tk_list = {next = 0xc02d9514, prev = 0xc02d9514}, tk_magic = 0xf00baa,

  tk_task = {next = 0xcfe28c4c, prev = 0xcfe2804c}, tk_client = 0xcff38aa0, 
  tk_rqstp = 0xcff32b58, tk_status = 0xffffff8c, tk_rpcwait = 0x0, tk_msg =
{
    rpc_proc = 0x15, rpc_argp = 0xcfe28550, rpc_resp = 0xcfe285a8, 
    rpc_cred = 0xcff38ba0}, tk_buffer = 0xcff78000, tk_garb_retry = 0x2, 
  tk_cred_retry = 0x2, tk_suid_retry = 0x1, tk_timeout_fn = 0, 
  tk_callback = 0, tk_action = 0, tk_exit = 0xc0176d10 <nfs_commit_done>, 
  tk_release = 0xc0175070 <nfs_writedata_release>, tk_calldata = 0xcfe284c0,

  tk_timer = {list = {next = 0x0, prev = 0x0}, expires = 0xa7c8, 
    data = 0xcfe284c0, function = 0xc020bab0 <rpc_run_timer>}, tk_wait = {
    lock = {gcc_is_buggy = 0x0}, task_list = {next = 0xcfe2852c, 
      prev = 0xcfe2852c}}, tk_timeout = 0x0, tk_flags = 0x1, tk_lock = 0x0, 
  tk_active = 0x1, tk_wakeup = 0x0, tk_runstate = 0x2, tk_pid = 0x80af}
(gdb) p/d 0x80af
$10 = 32943

(gdb) p *((struct rpc_task *)0xcfe284c0)->tk_rqstp
$11 = {rq_xprt = 0xcff32000, rq_timeout = {to_current = 0x46, 
    to_initval = 0x46, to_maxval = 0x1770, to_increment = 0x0, 
    to_resrvval = 0x2ee0, to_retries = 0x3, to_exponential = 0x1}, 
  rq_snd_buf = {io_vec = {{iov_base = 0xcff78000, iov_len = 0x64}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}}, io_nr = 0x1, io_len = 0x64}, 
  rq_rcv_buf = {io_vec = {{iov_base = 0xcff78280, iov_len = 0x280}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}}, io_nr = 0x1, io_len = 0x280}, 
  rq_task = 0xcfe284c0, rq_xid = 0x424070ad, rq_next = 0x0, rq_cong = 0x0, 
  rq_received = 0x1, rq_creddata = {0x0, 0x0}, rq_bytes_sent = 0x0}

[-- Attachment #2: Type: text/html, Size: 15454 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread
* RE: linux 2.4.18-5 nfs/rpc problem
@ 2003-10-11  0:18 Guzovsky, Eduard
  0 siblings, 0 replies; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-11  0:18 UTC (permalink / raw)
  To: 'Trond Myklebust', David Jeffery
  Cc: nfs, Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 3271 bytes --]

David, we were able to consistently reproduce the problem. 
After I made modifications Trond suggested we do not
see the problem any more. So I would say it is fixed.
Trond, will this fix make it into 2.2.24 ? 

Thank you very much for your help,

-Ed
 

 > -----Original Message-----
 > From: Trond Myklebust [mailto:trond.myklebust@fys.uio.no]
 > Sent: Friday, October 10, 2003 2:58 PM
 > To: David Jeffery
 > Cc: Guzovsky, Eduard; nfs@lists.sourceforge.net
 > Subject: Re: [NFS] linux 2.4.18-5 nfs/rpc problem
 > 
 > 
 > >>>>> " " == David Jeffery <david_jeffery@adaptec.com> writes:
 > 
 >      > Eduard, I have also seen this problem with both Red Hat and
 >      > stock kernels.  I've got some patches That I'm testing to try
 >      > and fix the problem for me, but it takes a while for my setup
 >      > to reproduce the problem.  How easy is it for you to trigger?
 > 
 >      > I've attached two patches for 2.4.22 that I'm currently
 >      > testing.  The first (nfsiput.patch) is to fix the deadlocked
 >      > processes you see by moving the iput() call to happen 
 > after the
 >      > waited on request is unlocked.
 > 
 > I take it you're assuming that the problem is a deadlock due to
 > clear_inode() hanging on our page lock? That does indeed appear to be
 > a possible source of contention.
 > 
 > Instead of your 2 patches, though, how about just ensuring that
 > anybody calling nfs_wait_on_requests() already has a reference to the
 > inode?
 > 
 > There is indeed one case where this is not being done: in
 > page_launder() you only have a lock on the page. This means that you
 > may end up calling nfs_wait_on_request() in order to flush out a page
 > only to discover that it called iput() from beneath you and is now
 > waiting in clear_inode() on your page lock.
 > 
 > Cheers,
 >   Trond
 > 
 > --- linux-2.4.23-pre5/fs/nfs/write.c.orig	2003-07-09 
 > 14:11:04.000000000 -0400
 > +++ linux-2.4.23-pre5/fs/nfs/write.c	2003-10-10 
 > 14:54:16.000000000 -0400
 > @@ -225,8 +225,19 @@
 >  	struct inode *inode = page->mapping->host;
 >  	unsigned long end_index;
 >  	unsigned offset = PAGE_CACHE_SIZE;
 > +	int inode_referenced = 0;
 >  	int err;
 >  
 > +	/*
 > +	 * Note: We need to ensure that we have a reference to the inode
 > +	 *       if we are to do asynchronous writes. If not, waiting
 > +	 *       in nfs_wait_on_request() may deadlock with 
 > clear_inode().
 > +	 *
 > +	 *       If igrab() fails here, then it is in any case safe to
 > +	 *       call nfs_wb_page(), since there will be no 
 > pending writes.
 > +	 */
 > +	if (igrab(inode) != 0)
 > +		inode_referenced = 1;
 >  	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 >  
 >  	/* Ensure we've flushed out any previous writes */
 > @@ -244,7 +255,8 @@
 >  		goto out;
 >  do_it:
 >  	lock_kernel();
 > -	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 > !IS_SYNC(inode)) {
 > +	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 > !IS_SYNC(inode) &&
 > +			inode_referenced) {
 >  		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 >  		if (err >= 0)
 >  			err = 0;
 > @@ -256,7 +268,9 @@
 >  	unlock_kernel();
 >  out:
 >  	UnlockPage(page);
 > -	return err; 
 > +	if (inode_referenced)
 > +		iput(inode);
 > +	return err;
 >  }
 >  
 >  /*
 > 

[-- Attachment #2: Type: text/html, Size: 8278 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread
* RE: linux 2.4.18-5 nfs/rpc problem
@ 2003-10-11  0:20 Guzovsky, Eduard
  0 siblings, 0 replies; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-11  0:20 UTC (permalink / raw)
  To: Guzovsky, Eduard, 'Trond Myklebust',
	'David Jeffery'
  Cc: 'nfs@lists.sourceforge.net', Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 3894 bytes --]

Sorry, I meant 2.4.24.

-Ed


 > -----Original Message-----
 > From: Guzovsky, Eduard 
 > Sent: Friday, October 10, 2003 8:18 PM
 > To: 'Trond Myklebust'; David Jeffery
 > Cc: nfs@lists.sourceforge.net; Pariseau, Luc; A.K. Srikanth
 > Subject: RE: [NFS] linux 2.4.18-5 nfs/rpc problem
 > 
 > 
 > David, we were able to consistently reproduce the problem. 
 > After I made modifications Trond suggested we do not
 > see the problem any more. So I would say it is fixed.
 > Trond, will this fix make it into 2.2.24 ? 
 > 
 > Thank you very much for your help,
 > 
 > -Ed
 >  
 > 
 >  > -----Original Message-----
 >  > From: Trond Myklebust [mailto:trond.myklebust@fys.uio.no]
 >  > Sent: Friday, October 10, 2003 2:58 PM
 >  > To: David Jeffery
 >  > Cc: Guzovsky, Eduard; nfs@lists.sourceforge.net
 >  > Subject: Re: [NFS] linux 2.4.18-5 nfs/rpc problem
 >  > 
 >  > 
 >  > >>>>> " " == David Jeffery <david_jeffery@adaptec.com> writes:
 >  > 
 >  >      > Eduard, I have also seen this problem with both Red Hat and
 >  >      > stock kernels.  I've got some patches That I'm 
 > testing to try
 >  >      > and fix the problem for me, but it takes a while 
 > for my setup
 >  >      > to reproduce the problem.  How easy is it for you 
 > to trigger?
 >  > 
 >  >      > I've attached two patches for 2.4.22 that I'm currently
 >  >      > testing.  The first (nfsiput.patch) is to fix the 
 > deadlocked
 >  >      > processes you see by moving the iput() call to happen 
 >  > after the
 >  >      > waited on request is unlocked.
 >  > 
 >  > I take it you're assuming that the problem is a deadlock due to
 >  > clear_inode() hanging on our page lock? That does indeed 
 > appear to be
 >  > a possible source of contention.
 >  > 
 >  > Instead of your 2 patches, though, how about just ensuring that
 >  > anybody calling nfs_wait_on_requests() already has a 
 > reference to the
 >  > inode?
 >  > 
 >  > There is indeed one case where this is not being done: in
 >  > page_launder() you only have a lock on the page. This 
 > means that you
 >  > may end up calling nfs_wait_on_request() in order to 
 > flush out a page
 >  > only to discover that it called iput() from beneath you and is now
 >  > waiting in clear_inode() on your page lock.
 >  > 
 >  > Cheers,
 >  >   Trond
 >  > 
 >  > --- linux-2.4.23-pre5/fs/nfs/write.c.orig	2003-07-09 
 >  > 14:11:04.000000000 -0400
 >  > +++ linux-2.4.23-pre5/fs/nfs/write.c	2003-10-10 
 >  > 14:54:16.000000000 -0400
 >  > @@ -225,8 +225,19 @@
 >  >  	struct inode *inode = page->mapping->host;
 >  >  	unsigned long end_index;
 >  >  	unsigned offset = PAGE_CACHE_SIZE;
 >  > +	int inode_referenced = 0;
 >  >  	int err;
 >  >  
 >  > +	/*
 >  > +	 * Note: We need to ensure that we have a reference to the inode
 >  > +	 *       if we are to do asynchronous writes. If not, waiting
 >  > +	 *       in nfs_wait_on_request() may deadlock with 
 >  > clear_inode().
 >  > +	 *
 >  > +	 *       If igrab() fails here, then it is in any case safe to
 >  > +	 *       call nfs_wb_page(), since there will be no 
 >  > pending writes.
 >  > +	 */
 >  > +	if (igrab(inode) != 0)
 >  > +		inode_referenced = 1;
 >  >  	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 >  >  
 >  >  	/* Ensure we've flushed out any previous writes */
 >  > @@ -244,7 +255,8 @@
 >  >  		goto out;
 >  >  do_it:
 >  >  	lock_kernel();
 >  > -	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 >  > !IS_SYNC(inode)) {
 >  > +	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 >  > !IS_SYNC(inode) &&
 >  > +			inode_referenced) {
 >  >  		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 >  >  		if (err >= 0)
 >  >  			err = 0;
 >  > @@ -256,7 +268,9 @@
 >  >  	unlock_kernel();
 >  >  out:
 >  >  	UnlockPage(page);
 >  > -	return err; 
 >  > +	if (inode_referenced)
 >  > +		iput(inode);
 >  > +	return err;
 >  >  }
 >  >  
 >  >  /*
 >  > 
 > 

[-- Attachment #2: Type: text/html, Size: 9889 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2003-10-13 14:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-10-08 22:42 linux 2.4.18-5 nfs/rpc problem Guzovsky, Eduard
2003-10-10 17:12 ` David Jeffery
2003-10-10 18:58   ` Trond Myklebust
2003-10-10 19:44     ` Trond Myklebust
2003-10-13 14:17       ` David Jeffery
  -- strict thread matches above, loose matches on Subject: below --
2003-10-11  0:18 Guzovsky, Eduard
2003-10-11  0:20 Guzovsky, Eduard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox