All of lore.kernel.org
 help / color / mirror / Atom feed
* RE: possible client stale filehandle bug?
@ 2005-02-16 21:42 Lever, Charles
  2005-02-16 21:47 ` Neil Horman
  0 siblings, 1 reply; 14+ messages in thread
From: Lever, Charles @ 2005-02-16 21:42 UTC (permalink / raw)
  To: Neil Horman; +Cc: Garrick Staples, nfs

[-- Attachment #1: Type: text/plain, Size: 1555 bytes --]

> I agree, it probably doesn't re-drive on any operation that 
> doesn't walk 
> a path, which is in line with what RHEL is doing currently.  I didn't 
> mean to imply that solaris retired ESTALE in all occurances of the 
> event.  Anywho, Can you point me to your patches?  I'd be 
> interested to 
> know how you managed to implement retry on ESTALE without 
> leaking into 
> the VFS, which I think you will recall was the big sticking 
> point that 
> we were debating here.

the patches do touch fs/namei.c (it was al viro's suggestion) with a
pretty simple change.  and i think they are KABI friendly enough to be
included in RHEL 3, once we are satisfied that the solution is
effective.

the cto-lookup-revalidate patch adds just enough of the 2.6
lookup-intent logic to the 2.4 VFS layer to allow us to support NFS
close-to-open in nfs_lookup_revalidate instead of in nfs_open.  this
resolves one of the most common ESTALE failure modes, where just the
object at the end of the pathname has been replaced.

the second patch applies on top of this.  it adds logic to redrive
pathname resolution if an ESTALE is encountered anywhere during a
pathname lookup.  it redrives it once from the top, asserting a flag
that causes the VFS layer to abandon the dcache and use only real
lookups for this resolution request.  if the redriven resolution fails,
we give up.  this resolves the other typical ESTALE failure mode, where
some or all of the path has been replaced, while avoiding retrying an
unbounded number of times.

[-- Attachment #2: linux-2.4.21-nfs-cto-lookup-revalidate.patch --]
[-- Type: application/octet-stream, Size: 4728 bytes --]

diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/exec.c new/fs/exec.c
--- linux-2.4.21/fs/exec.c	2004-12-23 12:28:49.582148000 -0500
+++ new/fs/exec.c	2004-12-23 12:29:53.852334000 -0500
@@ -402,7 +402,7 @@ struct file *open_exec(const char *name)
 	struct file *file;
 	int err = 0;
 
-	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_OPEN, &nd);
 	file = ERR_PTR(err);
 	if (!err) {
 		inode = nd.dentry->d_inode;
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/namei.c new/fs/namei.c
--- linux-2.4.21/fs/namei.c	2004-12-23 12:28:38.884159000 -0500
+++ new/fs/namei.c	2004-12-23 12:29:53.857334000 -0500
@@ -645,7 +645,7 @@ return_reval:
 		dentry = nd->dentry;
 		if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 			err = -ESTALE;
-			if (!dentry->d_op->d_revalidate(dentry, 0)) {
+			if (!dentry->d_op->d_revalidate(dentry, nd->flags & LOOKUP_OPEN)) {
 				d_invalidate(dentry);
 				break;
 			}
@@ -951,7 +951,7 @@ static inline int may_create(struct inod
  */
 static inline int lookup_flags(unsigned int f)
 {
-	unsigned long retval = LOOKUP_FOLLOW;
+	unsigned long retval = LOOKUP_FOLLOW | LOOKUP_OPEN;
 
 	if (f & O_NOFOLLOW)
 		retval &= ~LOOKUP_FOLLOW;
@@ -1032,7 +1032,7 @@ int open_namei(const char * pathname, in
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = path_lookup(pathname, LOOKUP_PARENT, nd);
+	error = path_lookup(pathname, LOOKUP_PARENT | LOOKUP_OPEN, nd);
 	if (error)
 		return error;
 
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/nfs/dir.c new/fs/nfs/dir.c
--- linux-2.4.21/fs/nfs/dir.c	2004-12-23 12:28:39.850868000 -0500
+++ new/fs/nfs/dir.c	2004-12-23 12:29:53.861335000 -0500
@@ -505,10 +505,20 @@ static inline void nfs_renew_times(struc
 	dentry->d_time = jiffies;
 }
 
-static inline
-int nfs_lookup_verify_inode(struct inode *inode)
+/*
+ * Refresh the inode if the attribute cache has expired.
+ *
+ * During an open(2), force an inode update in order to maintain close-
+ * to-open cache coherency, unless the "nocto" mount option is set.
+ */
+static inline int nfs_lookup_verify_inode(struct inode *inode, int flags)
 {
-	return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!(flags & LOOKUP_OPEN) ||
+	    (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO))
+		return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	dfprintk(VFS, "NFS: %s: inode (%x/%Ld) revalidate on open\n",
+		__FUNCTION__, inode->i_dev, (long long)NFS_FILEID(inode));
+	return __nfs_revalidate_inode(NFS_SERVER(inode),inode);
 }
 
 /*
@@ -562,7 +572,7 @@ static int nfs_lookup_revalidate(struct 
 
 	/* Force a full look up iff the parent directory has changed */
 	if (nfs_check_verifier(dir, dentry)) {
-		if (nfs_lookup_verify_inode(inode))
+		if (nfs_lookup_verify_inode(inode, flags))
 			goto out_bad;
 		goto out_valid;
 	}
@@ -571,7 +581,7 @@ static int nfs_lookup_revalidate(struct 
 	if (!error) {
 		if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0)
 			goto out_bad;
-		if (nfs_lookup_verify_inode(inode))
+		if (nfs_lookup_verify_inode(inode, flags))
 			goto out_bad;
 		goto out_valid_renew;
 	}
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/nfs/inode.c new/fs/nfs/inode.c
--- linux-2.4.21/fs/nfs/inode.c	2004-12-23 12:28:50.419958000 -0500
+++ new/fs/nfs/inode.c	2004-12-23 12:31:15.610732000 -0500
@@ -1005,23 +1005,18 @@ int nfs_open(struct inode *inode, struct
 {
 	struct rpc_auth *auth;
 	struct rpc_cred *cred;
-	int err = 0;
+
+	dfprintk(VFS, "NFS: nfs_open (%x/%Ld)\n",
+		inode->i_dev, (long long)NFS_FILEID(inode));
 
 	lock_kernel();
-	/* Ensure that we revalidate the data cache */
-	if (! (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)) {
-		err = __nfs_revalidate_inode(NFS_SERVER(inode),inode);
-		if (err)
-			goto out;
-	}
 	auth = NFS_CLIENT(inode)->cl_auth;
 	cred = rpcauth_lookupcred(auth, 0);
 	filp->private_data = cred;
 	if (filp->f_mode & FMODE_WRITE)
 		nfs_set_mmcred(inode, cred);
-out:
 	unlock_kernel();
-	return err;
+	return 0;
 }
 
 int nfs_release(struct inode *inode, struct file *filp)
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/include/linux/fs.h new/include/linux/fs.h
--- linux-2.4.21/include/linux/fs.h	2004-12-23 12:28:50.382958000 -0500
+++ new/include/linux/fs.h	2004-12-23 12:29:53.871334000 -0500
@@ -1407,6 +1407,7 @@ static inline long IS_ERR(const void *pt
 #define LOOKUP_PARENT		(16)
 #define LOOKUP_NOALT		(32)
 #define LOOKUP_ATOMIC		(64)
+#define LOOKUP_OPEN		(128)
 /*
  * Type of the last component on LOOKUP_PARENT
  */

[-- Attachment #3: linux-2.4.21-pathname-retry.patch --]
[-- Type: application/octet-stream, Size: 5935 bytes --]

diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/namei.c new/fs/namei.c
--- linux-2.4.21/fs/namei.c	2005-02-09 09:13:30.000000000 -0500
+++ new/fs/namei.c	2005-02-14 15:27:27.000000000 -0500
@@ -447,7 +447,7 @@ static inline void follow_dotdot(struct 
  *
  * We expect 'base' to be positive and a directory.
  */
-int link_path_walk(const char * name, struct nameidata *nd)
+static int __link_path_walk(const char *name, struct nameidata *nd)
 {
 	struct dentry *dentry;
 	struct inode *inode;
@@ -645,7 +645,7 @@ return_reval:
 		dentry = nd->dentry;
 		if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 			err = -ESTALE;
-			if (!dentry->d_op->d_revalidate(dentry, nd->flags & LOOKUP_OPEN)) {
+			if (!dentry->d_op->d_revalidate(dentry, nd->flags)) {
 				d_invalidate(dentry);
 				break;
 			}
@@ -661,6 +661,37 @@ return_err:
 	return err;
 }
 
+/*
+ * Wrapper to retry pathname resolution whenever the underlying
+ * file system returns an ESTALE.
+ *
+ * Retry the whole path once, forcing real lookup requests
+ * instead of relying on the dcache.
+ */
+int link_path_walk(const char *name, struct nameidata *nd)
+{
+	struct nameidata save = *nd;
+	int result;
+
+	/* make sure the stuff we saved doesn't go away */
+	dget(save.dentry);
+	mntget(save.mnt);
+
+	result = __link_path_walk(name, nd);
+	if (result == -ESTALE) {
+		*nd = save;
+		dget(nd->dentry);
+		mntget(nd->mnt);
+		nd->flags |= LOOKUP_REVAL;
+		result = __link_path_walk(name, nd);
+	}
+
+	dput(save.dentry);
+	mntput(save.mnt);
+
+	return result;
+}
+
 int path_walk(const char * name, struct nameidata *nd)
 {
 	current->total_link_count = 0;
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/nfs/dir.c new/fs/nfs/dir.c
--- linux-2.4.21/fs/nfs/dir.c	2005-02-09 09:13:30.000000000 -0500
+++ new/fs/nfs/dir.c	2005-02-14 15:36:37.000000000 -0500
@@ -513,11 +513,22 @@ static inline void nfs_renew_times(struc
  */
 static inline int nfs_lookup_verify_inode(struct inode *inode, int flags)
 {
-	if (!(flags & LOOKUP_OPEN) ||
-	    (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO))
-		return nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	dfprintk(VFS, "NFS: %s: inode (%x/%Ld) revalidate on open\n",
-		__FUNCTION__, inode->i_dev, (long long)NFS_FILEID(inode));
+	if (flags & LOOKUP_REVAL) {
+		dfprintk(VFS, "NFS: %s: inode (%x/%Ld) revalidate on estale\n",
+			__FUNCTION__, inode->i_dev, (long long)NFS_FILEID(inode));
+		goto force_revalidate;
+	}
+
+	if ((flags & LOOKUP_OPEN) &&
+	    !(NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)) {
+		dfprintk(VFS, "NFS: %s: inode (%x/%Ld) revalidate on open\n",
+			__FUNCTION__, inode->i_dev, (long long)NFS_FILEID(inode));
+		goto force_revalidate;
+	}
+
+	return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+force_revalidate:
 	return __nfs_revalidate_inode(NFS_SERVER(inode),inode);
 }
 
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/nfs/nfs3proc.c new/fs/nfs/nfs3proc.c
--- linux-2.4.21/fs/nfs/nfs3proc.c	2005-02-09 09:13:16.000000000 -0500
+++ new/fs/nfs/nfs3proc.c	2005-02-09 09:23:09.000000000 -0500
@@ -61,7 +61,7 @@ nfs3_proc_get_root(struct nfs_server *se
 	dprintk("NFS call  getroot\n");
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0);
-	dprintk("NFS reply getroot\n");
+	dprintk("NFS reply getroot: %d\n", status);
 	return status;
 }
 
@@ -77,7 +77,7 @@ nfs3_proc_getattr(struct inode *inode, s
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_GETATTR,
 			  NFS_FH(inode), fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
 
@@ -91,7 +91,7 @@ nfs3_proc_setattr(struct inode *inode, s
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
 
@@ -162,7 +162,7 @@ nfs3_proc_access(struct inode *inode, st
 
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
-	dprintk("NFS reply access\n");
+	dprintk("NFS reply access: %d\n", status);
 
 	if (status == 0 && (access & res.access) != access)
 		status = -EACCES;
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/fs/nfs/proc.c new/fs/nfs/proc.c
--- linux-2.4.21/fs/nfs/proc.c	2005-02-09 09:12:47.000000000 -0500
+++ new/fs/nfs/proc.c	2005-02-09 09:23:09.000000000 -0500
@@ -56,7 +56,7 @@ nfs_proc_get_root(struct nfs_server *ser
 	dprintk("NFS call  getroot\n");
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0);
-	dprintk("NFS reply getroot\n");
+	dprintk("NFS reply getroot: %d\n", status);
 	return status;
 }
 
@@ -72,7 +72,7 @@ nfs_proc_getattr(struct inode *inode, st
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_GETATTR,
 				NFS_FH(inode), fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
 
@@ -86,7 +86,7 @@ nfs_proc_setattr(struct inode *inode, st
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
 
diff -X /home/cel/src/linux/dont-diff -Naurp linux-2.4.21/include/linux/fs.h new/include/linux/fs.h
--- linux-2.4.21/include/linux/fs.h	2005-02-09 09:13:30.000000000 -0500
+++ new/include/linux/fs.h	2005-02-14 15:29:26.000000000 -0500
@@ -1408,6 +1408,7 @@ static inline long IS_ERR(const void *pt
 #define LOOKUP_NOALT		(32)
 #define LOOKUP_ATOMIC		(64)
 #define LOOKUP_OPEN		(128)
+#define LOOKUP_REVAL		(256)
 /*
  * Type of the last component on LOOKUP_PARENT
  */

^ permalink raw reply	[flat|nested] 14+ messages in thread
* RE: possible client stale filehandle bug?
@ 2005-02-16 21:18 Lever, Charles
  2005-02-16 21:23 ` Neil Horman
  0 siblings, 1 reply; 14+ messages in thread
From: Lever, Charles @ 2005-02-16 21:18 UTC (permalink / raw)
  To: Neil Horman; +Cc: Garrick Staples, nfs

hi neil-

> >>It seems that the Solaris clients never report any such=20
> errors, only the Linux
> >>clients.  However, watching 'snoop' on the Solaris NFS=20
> server, I see that it IS
> >>returning stale file handles to both OSes, but Solaris=20
> clients seem to retry
> >>the request several times; and the Linux clients=20
> immediately pass the error up
> >>to the application.
> >>
> >>Is there some condition that the 2.4 kernel is handling incorrectly?
> >=20
> >=20
> > I do not believe that Solaris redrives ESTALE on read, but=20
> they may do
> > it on open(). Linux does not redrive either case. See the many
> > discussions in the NFS list archives for why.
> >=20
>=20
> Solaris does in fact retry on operations on ESTALE errors,=20
> definately on=20
> open, and I think on read/readdir/stat/etc. as well.  We had some=20
> discussion about tht here recently.

as far as i know Solaris doesn't redrive on read or write, but only
during pathname resolution.  redriving a read or write will only work in
the case where the server has taken the export offline temporarily; if
the file handle really is bad, then redriving a read or write is
probably safe, but won't accomplish anything.

i have a patch that adds support for pathname resolution retry to 2.6
(now in Trond's NFS_ALL for 2.6.11-rc4) and a pair of patches that
implement this for RHEL 3.0 that i've sent to steve and al viro for
review.


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 14+ messages in thread
* possible client stale filehandle bug?
@ 2005-01-25 17:39 Garrick Staples
  2005-01-26  6:06 ` Trond Myklebust
  0 siblings, 1 reply; 14+ messages in thread
From: Garrick Staples @ 2005-01-25 17:39 UTC (permalink / raw)
  To: nfs

[-- Attachment #1: Type: text/plain, Size: 3095 bytes --]

Hi all,
   I have lots of storage in a large Solaris samfs environment that is NFS
shared to a large number of Solaris and RHEL3 clients.  Under some conditions,
linux apps have been getting stale filehandles during the normal course of
their activity.  Various file handling syscalls like read() or open() might
error.  Lots of renames and setattrs calls seem to trigger the problem.  
'ci' and 'cvs commit' are particularly good at this.

It seems that the Solaris clients never report any such errors, only the Linux
clients.  However, watching 'snoop' on the Solaris NFS server, I see that it IS
returning stale file handles to both OSes, but Solaris clients seem to retry
the request several times; and the Linux clients immediately pass the error up
to the application.

Is there some condition that the 2.4 kernel is handling incorrectly?


Sample snippet from the 'snoop' on the Solaris server with a Solaris client
waiting...

rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu TCP D=2049 S=610     Ack=3071279992 Seq=337022612 Len=0 Win=64240
rcf102.usc.edu -> almaak.usc.edu NFS C ACCESS3 FH=7BFE (read,modify,extend,execute)
almaak.usc.edu -> rcf102.usc.edu TCP D=610 S=2049     Ack=337022752 Seq=3071279992 Len=0 Win=64240
almaak.usc.edu -> rcf102.usc.edu NFS R ACCESS3 Stale NFS file handle
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu TCP D=2049 S=610     Ack=3071280516 Seq=337023056 Len=0 Win=64240
rcf102.usc.edu -> almaak.usc.edu NFS C ACCESS3 FH=7BFE (read,modify,extend,execute)
almaak.usc.edu -> rcf102.usc.edu TCP D=610 S=2049     Ack=337023196 Seq=3071280516 Len=0 Win=64240
almaak.usc.edu -> rcf102.usc.edu NFS R ACCESS3 Stale NFS file handle
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu TCP D=2049 S=610     Ack=3071280796 Seq=337023348 Len=0 Win=64240
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu TCP D=2049 S=610     Ack=3071281040 Seq=337023500 Len=0 Win=64240
rcf102.usc.edu -> almaak.usc.edu NFS C ACCESS3 FH=7BFE (read,modify,extend,execute)
almaak.usc.edu -> rcf102.usc.edu TCP D=610 S=2049     Ack=337023640 Seq=3071281040 Len=0 Win=64240
almaak.usc.edu -> rcf102.usc.edu NFS R ACCESS3 Stale NFS file handle
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE
rcf102.usc.edu -> almaak.usc.edu NFS C LOOKUP3 FH=B41B Entries.Log
almaak.usc.edu -> rcf102.usc.edu NFS R LOOKUP3 OK FH=7BFE

-- 
Garrick Staples, Linux/HPCC Administrator
University of Southern California

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2005-02-24 20:44 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-02-16 21:42 possible client stale filehandle bug? Lever, Charles
2005-02-16 21:47 ` Neil Horman
  -- strict thread matches above, loose matches on Subject: below --
2005-02-16 21:18 Lever, Charles
2005-02-16 21:23 ` Neil Horman
2005-02-24 19:33   ` Trond Myklebust
2005-02-24 20:43     ` nhorman
2005-01-25 17:39 Garrick Staples
2005-01-26  6:06 ` Trond Myklebust
2005-01-26  6:35   ` Garrick Staples
2005-01-26 13:11     ` Neil Horman
2005-01-26 14:31     ` raven
2005-01-26 17:49       ` Garrick Staples
2005-01-28  0:49         ` Ian Kent
2005-01-26 13:07   ` Neil Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.