2.4.23: Killed process on NFS client can result in lost lock on server

Linux NFS development
 help / color / mirror / Atom feed

* 2.4.23: Killed process on NFS client can result in lost lock on server
@ 2003-12-02 19:56 Philippe Troin
  0 siblings, 0 replies; only message in thread
From: Philippe Troin @ 2003-12-02 19:56 UTC (permalink / raw)
  To: nfs

[-- Attachment #1: Type: text/plain, Size: 187 bytes --]

The problem described in the enclosed mail still occurs in 2.4.23. If
anybody cares.

Applying the enclosed patch from Trond makes the problem less
frequent, but it still occurs.

Phil.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: linux-2.4.23-nfs-locks.patch --]
[-- Type: text/x-patch, Size: 480 bytes --]

diff -ruN linux-2.4.23.orig/fs/nfs/file.c linux-2.4.23/fs/nfs/file.c
--- linux-2.4.23.orig/fs/nfs/file.c	Mon Aug 25 04:44:43 2003
+++ linux-2.4.23/fs/nfs/file.c	Mon Dec  1 11:35:22 2003
@@ -293,7 +293,8 @@
 	status2 = filemap_fdatawait(inode->i_mapping);
 	if (status2 && !status)
 		status = status2;
-	if (status < 0)
+	/* Note: Ignore status if we're cleaning up locks on process exit */
+	if (status < 0 && !(current->flags & PF_EXITING))
 		return status;
 
 	lock_kernel();

[-- Attachment #3: Type: message/rfc822, Size: 7112 bytes --]

[-- Attachment #3.1.1: Type: text/plain, Size: 1697 bytes --]

I've noticed this first with bogofilter, and was able to reproduce the
problem with the enclosed test program.

Setup: kernel 2.4.22 and nfs-utils 1.0.5

A (nfs) client mounts a file system from the (nfs) server with these
options (from /proc/mounts):

server:/fs /fs nfs rw,nodev,v3,rsize=8192,wsize=8192,hard,intr,udp,lock,addr=server

If a process running on the (nfs) client is killed by a signal while
holding a lock on a (nfs) file, the server might not relinquish the
lock even though the locker is dead.

Try compiling and running the enclosed C program on a nfs client to
demonstrate the problem:

   phil@client:~% gcc -Wall -W -o kill-locks kill-locks.c
   phil@client:~% ./kill-locks
   [child] fcntl(F_SETLK): Resource temporarily unavailable
   unexpected status from child 00000100
   successful locking attempts: 2
   zsh: 10479 exit 1     ./kill-locks
   phil@client:~% ./kill-locks
   [child] fcntl(F_SETLK): Resource temporarily unavailable
   unexpected status from child 00000100
   successful locking attempts: 0
   zsh: 10483 exit 1     ./kill-locks
   phil@client:~% ls -i kill-locks.tmp
    371922 kill-locks.tmp
   phil@client:~% grep 371922 /proc/locks
   zsh: 10492 exit 1     grep 371922 /proc/locks
   phil@client:~%

On the server:

   phil@server:~% grep 371922 /proc/locks
   2: POSIX  ADVISORY  WRITE 10480 3a:04:371922 0 EOF c8138840 c8138484 cda9d324 00000000 c813884c
   phil@server:~%

The lock is still held.

While trying to make this test program, I've noticed that the problem
only occurs while I/O is done on the locked file. Note the write() in
a while loop in the test program. I could not get the bad behavior to
show up if no I/O is going on.

Phil.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3.1.2: kill-locks.c --]
[-- Type: text/x-csrc, Size: 2274 bytes --]

#define _GNU_SOURCE
#define _LARGEFILE_SOURCE
#define _FILE_OFFSET_BITS 64

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <errno.h>

#define FNAME		"kill-locks.tmp"
#define BUFSIZE		16384
#define DEATHSIG	SIGINT

void sighandler(int signum)
{
  if (0) signum = 0;
}

int
main()
{
  int			successcount = 0;
  struct sigaction	sa;
  sigset_t		blockset, origset, waitset;
  /**/

  sa.sa_handler = &sighandler;
  sa.sa_flags	= 0;
  sigemptyset(&sa.sa_mask);
  if (sigaction(SIGUSR1, &sa, NULL) == -1)
    perror("sigaction(SIGUSR1)"), exit(1);
  if (sigaction(SIGCHLD, &sa, NULL) == -1)
    perror("sigaction(SIGCHLD)"), exit(1);

  sigemptyset(&blockset);
  sigaddset(&blockset, SIGUSR1);
  sigaddset(&blockset, SIGCHLD);
  if (sigprocmask(SIG_BLOCK, &blockset, &origset) == -1)
    perror("sigprocmask"), exit(1);
  waitset = origset;
  sigdelset(&waitset, SIGUSR1);
  sigdelset(&waitset, SIGCHLD);
  sigaddset(&waitset, DEATHSIG);

  while (1)
    {
      pid_t	childpid = fork();
      int	status;
      /**/

      if (childpid == (pid_t) -1)
	perror("fork()"), exit(1);
      if (childpid == 0)
	{
	  /* Child */
	  int		fd;
	  struct flock	lck;
	  char		buf[BUFSIZE];
	  /**/

	  if (sigprocmask(SIG_SETMASK, &origset, NULL) == -1)
	    perror("[child] sigprocmask"), exit(1);

	  fd = open(FNAME, O_RDWR|O_CREAT, 0666);
	  if (fd == -1)
	    perror("[child] open()"), exit(1);

	  lck.l_type   = F_WRLCK;
	  lck.l_whence = SEEK_SET;
	  lck.l_start  = (off_t)0;
	  lck.l_len    = (off_t)0;
	  if (fcntl(fd, F_SETLK, &lck) == -1)
	    perror("[child] fcntl(F_SETLK)"), exit(1);
	  memset(buf, 0, sizeof(buf));
	  kill(getppid(), SIGUSR1);
	  while(1)
	    write(fd, buf, sizeof(buf));
	}

      if ( ! (sigsuspend(&waitset) == -1 && errno == EINTR))
	perror("sigsuspend"), exit(1);
      usleep(rand()%1000);
      kill(childpid, DEATHSIG);
      if (waitpid(childpid, &status, 0) != childpid)
	perror("waitpid"), exit(1);
      if ( ! (WIFSIGNALED(status) && WTERMSIG(status) == DEATHSIG))
	{
	  fprintf(stderr,
		  "unexpected status from child %08X\n"
		  "successful locking attempts: %d\n",
		  status, successcount);
	  exit(1);
	}
      ++successcount;
    }
}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2003-12-02 19:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-12-02 19:56 2.4.23: Killed process on NFS client can result in lost lock on server Philippe Troin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox