public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jim Nance <jlnance@us54.synopsys.com>
To: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: jlnance@unity.ncsu.edu, linux-kernel@vger.kernel.org,
	gary.nifong@synopsys.COM, James.Nance@synopsys.COM,
	david.thomas@synopsys.COM
Subject: Re: NFS problems with Linux-2.4
Date: Thu, 15 May 2003 11:22:31 -0400	[thread overview]
Message-ID: <20030515112231.A28148@synopsys.com> (raw)
In-Reply-To: <16065.3323.449992.207039@charged.uio.no>; from trond.myklebust@fys.uio.no on Tue, May 13, 2003 at 05:19:23PM +0200

[-- Attachment #1: Type: text/plain, Size: 1035 bytes --]

On Tue, May 13, 2003 at 05:19:23PM +0200, Trond Myklebust wrote:
> 
> Could you please try with a newer kernel. The close-to-open cache
> consistency fixes are a relatively recent addition to the Linux NFS
> client. I dunno if RedHat's 2.4.18 kernel has them.
> 
>   2.4.7 certainly does not.

I tried again with the 2.4.20 based kernel that Red Hat released
yesterday (2.4.20-13.7bigmem).  The problem that I was seeing occurs
less frequently there, but it still happens.

I have attached a program which can reproduce this.  If you run it
under 2.4.7 it fails instantly.  If you use 2.4.20 it may take a
minute or so but it will also fail.

Thanks,

Jim

PS: Do you know if there is any way to work around this problem from
    within my program?

-- 
----------------------------------------------------------------------------
Jim Nance                                                           Synopsys
(919) 425-7219  Do you have sweet iced tea?        jlnance at synopsys.com
                No, but there's sugar on the table.

[-- Attachment #2: p1.c --]
[-- Type: text/plain, Size: 4507 bytes --]

/* This program demonstrates a problem with the close/open consistency
 * of NFS file systems under Linux.  It fails very rapidy with Red Hats
 * 2.4.7-10smp kernel.  This kernel was known to have bugs.  It also fails
 * with Red Hats 2.4.20-13.7bigmem kernel, which was thought to have this
 * bug fixed.  For my testcase both linux machines were talking to a
 * network applicance file server and mounted like this:
 *
 * na1-rtp:/vol/vol0/home/jlnance /home/jlnance nfs rw,v3,rsize=4096,\
 * wsize=4096,hard,intr,udp,lock,addr=na1-rtp 0 0
 *
 * This program needs to be run on 2 machines, assume hostnames A & B.
 * A and B need to share an NFS mounted file system.
 *
 * On machine A:
 *   cd /some/nfs/path/common/to/both
 *   ./p1 s
 *
 * On machine B:
 *   cd /some/nfs/path/common/to/both
 *   ./p1 c A
 *
 * After a while you may see output similar to:
 *   cayman> ./p1 s
 *   Failed to find #0 which client wrote
 *   Failed on file number 483
 */


#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <netdb.h>

#define PORT 12387
#define FLEN 16

void die()
{
  perror("");
  exit(-1);
}

void Write(int fd, char *buff, size_t len)
{
  for(;;) {
    int nsent=write(fd, buff, len);
    if(nsent==0)
      exit(0);
    if(nsent==-1) {
      if(errno!=EINTR)
        die();
    } else {
      buff += nsent;
      len  -= nsent;
      if(len==0) {
        return;
      }
    }
  }
}

void Read(int fd, char *buff, size_t len)
{
  for(;;) {
    int nread=read(fd, buff, len);
    if(nread==0)
      exit(0);
    if(nread==-1) {
      if(errno!=EINTR)
        die();
    } else {
      buff += nread;
      len  -= nread;
      if(len==0) {
        return;
      }
    }
  }
}

int server()
{
  int sock = socket(AF_INET, SOCK_STREAM, 0);
  if(sock==-1) die(); else {
    struct sockaddr_in name;
    int                on = 1;
    name.sin_family       = AF_INET;
    name.sin_addr.s_addr  = htonl(INADDR_ANY);
    name.sin_port         = htons(PORT);

    setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof on);
    if(bind(sock, (struct sockaddr*)&name, sizeof(name))==-1) die(); else {
      if(listen(sock, 1)==-1) die(); else {
        int tsock = accept(sock, 0, 0);
        if(tsock!=-1) {
          int cnt;

          for(cnt=0; cnt<100000; cnt++) {
            int  fd;
            char dummy;
            char number[FLEN];
            struct stat sbuf;
            /*sprintf(number, "#%d", cnt);*/
            sprintf(number, "#%d", 0);
            Write(tsock, number, sizeof(number));
            Read(tsock, &dummy, 1);
            if(stat(number, &sbuf)) {
              fprintf(stderr, "Failed to find %s which client wrote\n", number);
              fprintf(stderr, "Failed on file number %d\n", cnt);
              exit(-2);
            }
            unlink(number);
          }
        }
      }
    }
  }

  return 0;
}

int client(char *server)
{
  struct hostent *info = gethostbyname(server);
  if(!info) die(); else {
    int rsocket = socket(AF_INET, SOCK_STREAM, 0);
    if(rsocket==-1) die(); else {
      struct sockaddr_in name;
      name.sin_family = AF_INET;
      name.sin_port   = htons(PORT);
      memcpy(&name.sin_addr, info->h_addr_list[0], sizeof(struct in_addr));
      if(connect(rsocket, (struct sockaddr*)&name, sizeof(name))==-1)
        die();
      else {
        for(;;) {
          int  fd;
          char fname[FLEN];
          char tname[FLEN+8];

          Read(rsocket, fname, sizeof(fname));
          strcpy(tname, fname);
          strcat(tname, ".tmp");

          fd = open(tname, O_WRONLY|O_CREAT, 0600);
          if(fd==-1) die();

          Write(fd, fname, sizeof(fname)); /* Junk data */
          close(fd);

          rename(tname, fname);

          Write(rsocket, fname, 1); /* Tells the server we are done */
        }
      }
    }
  }

  return 0;
}

void usage(char *prog)
{
  fprintf(stderr, "Usage:\n");
  fprintf(stderr, " %s s\n", prog);
  fprintf(stderr, " %s c servername\n", prog);
  fprintf(stderr, " Run 1 of each in the same NFS directory on 2 different "
        "machines\n Two processes total\n");
  exit(-1);
}

int main(int ac, char **av)
{
  if(ac<2) {
    usage(av[0]);
  } if(av[1][0]=='s') {
    return server();
  }else if(ac<3) {
    usage(av[0]);
  } else if(av[1][0]=='c') {
    return client(av[2]);
  } else {
    usage(av[0]);
  }

  return -1;
}

  reply	other threads:[~2003-05-15 15:11 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-05-13 14:50 NFS problems with Linux-2.4 jlnance
2003-05-13 15:19 ` Trond Myklebust
2003-05-15 15:22   ` Jim Nance [this message]
2003-05-18 15:00     ` Trond Myklebust
2003-05-19  0:53       ` jlnance
2003-05-19 11:27         ` Trond Myklebust
2003-05-19 20:02       ` Jim Nance
2003-05-13 19:07 ` jjs
2003-05-13 19:24   ` Roland Dreier
2003-05-13 21:55     ` jjs
2003-05-13 23:11   ` Alan Cox
     [not found] <482A3FA0050D21419C269D13989C6113127532@lavender-fe.eng.netapp.com>
2003-05-27 17:29 ` jlnance

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030515112231.A28148@synopsys.com \
    --to=jlnance@us54.synopsys.com \
    --cc=James.Nance@synopsys.COM \
    --cc=david.thomas@synopsys.COM \
    --cc=gary.nifong@synopsys.COM \
    --cc=jlnance@unity.ncsu.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=trond.myklebust@fys.uio.no \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox