All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] vfs intent lookup patch
@ 2002-10-14 22:53 Peter Braam
  2002-10-15 15:58 ` William A.(Andy) Adamson
  0 siblings, 1 reply; 5+ messages in thread
From: Peter Braam @ 2002-10-14 22:53 UTC (permalink / raw)
  To: viro, linux-fsdevel

Hi Al, 

Attached is a 2.4 patch for lookup intents, which we use with Lustre.
This patch adds a lookup2, revalidate2 and intent_release method.
None of those are required if the kernel changes the parameters
globally, but I wanted to keep things simple.

The purpose of the patch is to give the lookup call sufficient
information about the "intent" of the lookup.  Intents can be tags
like "a create/open is coming".  With the intent patch, distributed
O_EXCL problems, from which NFS suffers too, are easily addressed.
For Lustre, which runs on 1,000+ node clusters, reducing file system
transactions to a single RPC from ->lookup is the core benefit.  For
InterMezzo the intent opens the opportunity to refresh the cache
before the ->create/mkdir etc methods lock anything, which is very
beneficial.  This is much along the lines of what we discussed over
the last year and a bit.

We would like to submit something like this for 2.5, for use with
Lustre and InterMezzo. 

The 2.5 patch is similar, but in 2.5 the struct intent is conveniently
embedded in the struct nameidata making things quite a bit simpler.
Intent_release is done as part of releasing the nameidata and still
requires a new dentry method.  We will send you this patch in a few
days time.

However, I suspect that after you review this you might come up with
an equivalent but better way of doing this.  Particularly interesting
to us is how the d_it field we added to the dentry can be protected
between the return from lookup and the use of that field in
->create/mkdir etc.  We do that with a semaphore (which we placed
under d_fsdata); alternatives are passing the nameidata or intent to
every inode operation that requires the intent.

Best wishes,

- Peter -

--- kernel-2.4.18-pristine/include/linux/dcache.h	2002-10-14 13:51:28.000000000 -0600
+++ kernel-2.4.18/include/linux/dcache.h	2002-10-14 14:08:23.000000000 -0600
@@ -6,6 +6,34 @@
 #include <asm/atomic.h>
 #include <linux/mount.h>
 
+#define IT_OPEN  (1)
+#define IT_CREAT  (1<<1)
+#define IT_MKDIR  (1<<2)
+#define IT_LINK  (1<<3)
+#define IT_LINK2  (1<<4)
+#define IT_SYMLINK  (1<<5)
+#define IT_UNLINK  (1<<6)
+#define IT_RMDIR  (1<<7)
+#define IT_RENAME  (1<<8)
+#define IT_RENAME2  (1<<9)
+#define IT_READDIR  (1<<10)
+#define IT_GETATTR  (1<<11)
+#define IT_SETATTR  (1<<12)
+#define IT_READLINK  (1<<13)
+#define IT_MKNOD  (1<<14)
+#define IT_LOOKUP  (1<<15)
+
+struct lookup_intent {
+	int it_op;
+	int it_mode;
+	int it_disposition;
+	int it_status;
+	struct iattr *it_iattr;
+	__u64 it_lock_handle[2];
+	int it_lock_mode;
+	void *it_data;
+};
+
 /*
  * linux/include/linux/dcache.h
  *
@@ -79,6 +107,7 @@
 	unsigned long d_time;		/* used by d_revalidate */
 	struct dentry_operations  *d_op;
 	struct super_block * d_sb;	/* The root of the dentry tree */
+	struct lookup_intent *d_it;
 	unsigned long d_vfs_flags;
 	void * d_fsdata;		/* fs-specific data */
 	void * d_extra_attributes;	/* TUX-specific data */
@@ -92,6 +121,8 @@
 	int (*d_delete)(struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
+	int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
+	void (*d_intent_release)(struct dentry *, struct lookup_intent *);
 };
 
 /* the dentry parameter passed to d_hash and d_compare is the parent
--- kernel-2.4.18-pristine/include/linux/fs.h	2002-10-14 13:47:27.000000000 -0600
+++ kernel-2.4.18/include/linux/fs.h	2002-10-14 14:08:23.000000000 -0600
@@ -572,6 +572,7 @@
 
 	/* needed for tty driver, and maybe others */
 	void			*private_data;
+	struct lookup_intent    *f_intent;
 
 	/* preallocated helper kiobuf to speedup O_DIRECT */
 	struct kiobuf		*f_iobuf;
@@ -829,7 +830,9 @@
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry,
+		struct lookup_intent *it);
 
 /*
  * File types
@@ -890,6 +893,7 @@
 struct inode_operations {
 	int (*create) (struct inode *,struct dentry *,int);
 	struct dentry * (*lookup) (struct inode *,struct dentry *);
+	struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
 	int (*link) (struct dentry *,struct inode *,struct dentry *);
 	int (*unlink) (struct inode *,struct dentry *);
 	int (*symlink) (struct inode *,struct dentry *,const char *);
@@ -1036,6 +1040,7 @@
 extern struct vfsmount *kern_mount(struct file_system_type *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
 extern void umount_tree(struct vfsmount *);
 
 #define kern_umount mntput
@@ -1370,6 +1375,7 @@
 extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
 
 extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
 extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
@@ -1381,6 +1387,8 @@
 extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
 #define user_path_walk(name,nd)	 __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
 #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
+#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
 
 extern void iput(struct inode *);
 extern void force_delete(struct inode *);
--- kernel-2.4.18-pristine/fs/dcache.c	2002-10-14 13:47:27.000000000 -0600
+++ kernel-2.4.18/fs/dcache.c	2002-10-14 14:08:23.000000000 -0600
@@ -645,6 +645,7 @@
 	dentry->d_fsdata = NULL;
 	dentry->d_extra_attributes = NULL;
 	dentry->d_mounted = 0;
+	dentry->d_it = NULL;
 	INIT_LIST_HEAD(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
--- kernel-2.4.18-pristine/fs/namei.c	2002-10-14 13:56:44.000000000 -0600
+++ kernel-2.4.18/fs/namei.c	2002-10-14 14:08:23.000000000 -0600
@@ -94,6 +94,14 @@
  * XEmacs seems to be relying on it...
  */
 
+void intent_release(struct dentry *de, struct lookup_intent *it)
+{
+	if (de->d_op && de->d_op->d_intent_release)
+		de->d_op->d_intent_release(de, it);
+	de->d_it = NULL;
+}
+
+
 /* In order to reduce some races, while at the same time doing additional
  * checking and hopefully speeding things up, we copy filenames to the
  * kernel data space before using them..
@@ -260,10 +268,19 @@
  * Internal lookup() using the new generic dcache.
  * SMP-safe
  */
-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
+				    int flags, struct lookup_intent *it)
 {
 	struct dentry * dentry = d_lookup(parent, name);
 
+	if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
+		if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
+		    !d_invalidate(dentry)) {
+			dput(dentry);
+			dentry = NULL;
+		}
+		return dentry;
+	} else
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 			dput(dentry);
@@ -281,7 +298,8 @@
  * make sure that nobody added the entry to the dcache in the meantime..
  * SMP-safe
  */
-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
+				  int flags, struct lookup_intent *it)
 {
 	struct dentry * result;
 	struct inode *dir = parent->d_inode;
@@ -300,6 +318,9 @@
 		result = ERR_PTR(-ENOMEM);
 		if (dentry) {
 			lock_kernel();
+			if (dir->i_op->lookup2)
+				result = dir->i_op->lookup2(dir, dentry, it);
+			else
 			result = dir->i_op->lookup(dir, dentry);
 			unlock_kernel();
 			if (result)
@@ -321,6 +342,12 @@
 			dput(result);
 			result = ERR_PTR(-ENOENT);
 		}
+	} else if (result->d_op && result->d_op->d_revalidate2) {
+		if (!result->d_op->d_revalidate2(result, flags, it) &&
+		    !d_invalidate(result)) {
+			dput(result);
+			result = ERR_PTR(-ENOENT);
+		}
 	}
 	return result;
 }
@@ -447,7 +474,8 @@
  *
  * We expect 'base' to be positive and a directory.
  */
-int link_path_walk(const char * name, struct nameidata *nd)
+int link_path_walk_it(const char *name, struct nameidata *nd,
+		      struct lookup_intent *it)
 {
 	struct dentry *dentry;
 	struct inode *inode;
@@ -524,12 +552,12 @@
 				break;
 		}
 		/* This does the actual lookups.. */
-		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
 		if (!dentry) {
 			err = -EWOULDBLOCKIO;
 			if (atomic)
 				break;
-			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
 			err = PTR_ERR(dentry);
 			if (IS_ERR(dentry))
 				break;
@@ -563,7 +591,7 @@
 			nd->dentry = dentry;
 		}
 		err = -ENOTDIR; 
-		if (!inode->i_op->lookup)
+		if (!inode->i_op->lookup && !inode->i_op->lookup2)
 			break;
 		continue;
 		/* here ends the main loop */
@@ -590,12 +618,12 @@
 			if (err < 0)
 				break;
 		}
-		dentry = cached_lookup(nd->dentry, &this, 0);
+		dentry = cached_lookup(nd->dentry, &this, 0, it);
 		if (!dentry) {
 			err = -EWOULDBLOCKIO;
 			if (atomic)
 				break;
-			dentry = real_lookup(nd->dentry, &this, 0);
+			dentry = real_lookup(nd->dentry, &this, 0, it);
 			err = PTR_ERR(dentry);
 			if (IS_ERR(dentry))
 				break;
@@ -619,7 +647,8 @@
 			goto no_inode;
 		if (lookup_flags & LOOKUP_DIRECTORY) {
 			err = -ENOTDIR; 
-			if (!inode->i_op || !inode->i_op->lookup)
+			if (!inode->i_op || (!inode->i_op->lookup &&
+					     !inode->i_op->lookup2))
 				break;
 		}
 		goto return_base;
@@ -661,10 +690,21 @@
 	return err;
 }
 
+int link_path_walk(const char * name, struct nameidata *nd)
+{
+	return link_path_walk_it(name, nd, NULL);
+}
+
+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
+{
+	current->total_link_count = 0;
+	return link_path_walk_it(name, nd, it);
+}
+
 int path_walk(const char * name, struct nameidata *nd)
 {
 	current->total_link_count = 0;
-	return link_path_walk(name, nd);
+	return link_path_walk_it(name, nd, NULL);
 }
 
 /* SMP-safe */
@@ -749,6 +789,17 @@
 }
 
 /* SMP-safe */
+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
+		   struct lookup_intent *it)
+{
+	int error = 0;
+	if (path_init(path, flags, nd))
+		error = path_walk_it(path, nd, it);
+	return error;
+}
+
+
+/* SMP-safe */
 int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
 {
 	int error = 0;
@@ -777,7 +828,8 @@
  * needs parent already locked. Doesn't follow mounts.
  * SMP-safe.
  */
-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
+			       struct lookup_intent *it)
 {
 	struct dentry * dentry;
 	struct inode *inode;
@@ -800,13 +852,16 @@
 			goto out;
 	}
 
-	dentry = cached_lookup(base, name, 0);
+	dentry = cached_lookup(base, name, 0, it);
 	if (!dentry) {
 		struct dentry *new = d_alloc(base, name);
 		dentry = ERR_PTR(-ENOMEM);
 		if (!new)
 			goto out;
 		lock_kernel();
+		if (inode->i_op->lookup2)
+			dentry = inode->i_op->lookup2(inode, new, it);
+		else
 		dentry = inode->i_op->lookup(inode, new);
 		unlock_kernel();
 		if (!dentry)
@@ -818,6 +873,12 @@
 	return dentry;
 }
 
+struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
+{
+	return lookup_hash_it(name, base, NULL);
+}
+
+
 /* SMP-safe */
 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
 {
@@ -839,7 +900,7 @@
 	}
 	this.hash = end_name_hash(hash);
 
-	return lookup_hash(&this, base);
+	return lookup_hash_it(&this, base, NULL);
 access:
 	return ERR_PTR(-EACCES);
 }
@@ -870,6 +931,23 @@
 	return err;
 }
 
+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
+		   struct lookup_intent *it)
+{
+	char *tmp;
+	int err;
+
+	tmp = getname(name);
+	err = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		err = 0;
+		if (path_init(tmp, flags, nd))
+			err = path_walk_it(tmp, nd, it);
+		putname(tmp);
+	}
+	return err;
+}
+
 /*
  * It's inline, so penalty for filesystems that don't use sticky bit is
  * minimal.
@@ -1008,7 +1086,8 @@
  * for symlinks (where the permissions are checked later).
  * SMP-safe
  */
-int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
+int open_namei_it(const char *pathname, int flag, int mode,
+		  struct nameidata *nd, struct lookup_intent *it)
 {
 	int acc_mode, error = 0;
 	struct inode *inode;
@@ -1022,7 +1101,7 @@
 	 * The simplest case - just a plain lookup.
 	 */
 	if (!(flag & O_CREAT)) {
-		error = path_lookup(pathname, lookup_flags(flag), nd);
+		error = path_lookup_it(pathname, lookup_flags(flag), nd, it);
 		if (error)
 			return error;
 		dentry = nd->dentry;
@@ -1032,6 +1111,10 @@
 	/*
 	 * Create - we need to know the parent.
 	 */
+	if (it) {
+		it->it_mode = mode;
+		it->it_op |= IT_CREAT;
+	}
 	error = path_lookup(pathname, LOOKUP_PARENT, nd);
 	if (error)
 		return error;
@@ -1047,7 +1130,7 @@
 
 	dir = nd->dentry;
 	down(&dir->d_inode->i_sem);
-	dentry = lookup_hash(&nd->last, nd->dentry);
+	dentry = lookup_hash_it(&nd->last, nd->dentry, it);
 
 do_last:
 	error = PTR_ERR(dentry);
@@ -1056,6 +1139,7 @@
 		goto exit;
 	}
 
+	it->it_mode = mode;
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode) {
 		error = vfs_create(dir->d_inode, dentry,
@@ -1175,8 +1259,10 @@
 	return 0;
 
 exit_dput:
+	intent_release(dentry, it);
 	dput(dentry);
 exit:
+	intent_release(nd->dentry, it);
 	path_release(nd);
 	return error;
 
@@ -1196,6 +1282,8 @@
 	 */
 	UPDATE_ATIME(dentry->d_inode);
 	error = dentry->d_inode->i_op->follow_link(dentry, nd);
+	if (error)
+		intent_release(dentry, it);
 	dput(dentry);
 	if (error)
 		return error;
@@ -1217,13 +1305,20 @@
 	}
 	dir = nd->dentry;
 	down(&dir->d_inode->i_sem);
-	dentry = lookup_hash(&nd->last, nd->dentry);
+	dentry = lookup_hash_it(&nd->last, nd->dentry, NULL);
 	putname(nd->last.name);
 	goto do_last;
 }
 
+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
+{
+	return open_namei_it(pathname, flag, mode, nd, NULL);
+}
+
+
 /* SMP-safe */
-static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
+static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
+				    struct lookup_intent *it)
 {
 	struct dentry *dentry;
 
@@ -1231,7 +1326,7 @@
 	dentry = ERR_PTR(-EEXIST);
 	if (nd->last_type != LAST_NORM)
 		goto fail;
-	dentry = lookup_hash(&nd->last, nd->dentry);
+	dentry = lookup_hash_it(&nd->last, nd->dentry, it);
 	if (IS_ERR(dentry))
 		goto fail;
 	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
@@ -1277,6 +1372,7 @@
 	char * tmp;
 	struct dentry * dentry;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode };
 
 	if (S_ISDIR(mode))
 		return -EPERM;
@@ -1287,7 +1383,7 @@
 	error = path_lookup(tmp, LOOKUP_PARENT, &nd);
 	if (error)
 		goto out;
-	dentry = lookup_create(&nd, 0);
+	dentry = lookup_create(&nd, 0, &it);
 	error = PTR_ERR(dentry);
 
 	mode &= ~current->fs->umask;
@@ -1305,6 +1401,7 @@
 		default:
 			error = -EINVAL;
 		}
+		intent_release(dentry, &it);
 		dput(dentry);
 	}
 	up(&nd.dentry->d_inode->i_sem);
@@ -1345,6 +1442,7 @@
 {
 	int error = 0;
 	char * tmp;
+	struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode };
 
 	tmp = getname(pathname);
 	error = PTR_ERR(tmp);
@@ -1355,11 +1453,12 @@
 		error = path_lookup(tmp, LOOKUP_PARENT, &nd);
 		if (error)
 			goto out;
-		dentry = lookup_create(&nd, 1);
+		dentry = lookup_create(&nd, 1, &it);
 		error = PTR_ERR(dentry);
 		if (!IS_ERR(dentry)) {
 			error = vfs_mkdir(nd.dentry->d_inode, dentry,
 					  mode & ~current->fs->umask);
+			intent_release(dentry, &it);
 			dput(dentry);
 		}
 		up(&nd.dentry->d_inode->i_sem);
@@ -1439,6 +1538,7 @@
 	char * name;
 	struct dentry *dentry;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_RMDIR };
 
 	name = getname(pathname);
 	if(IS_ERR(name))
@@ -1460,10 +1560,11 @@
 			goto exit1;
 	}
 	down(&nd.dentry->d_inode->i_sem);
-	dentry = lookup_hash(&nd.last, nd.dentry);
+	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+		intent_release(dentry, &it);
 		dput(dentry);
 	}
 	up(&nd.dentry->d_inode->i_sem);
@@ -1507,6 +1608,7 @@
 	char * name;
 	struct dentry *dentry;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_UNLINK };
 
 	name = getname(pathname);
 	if(IS_ERR(name))
@@ -1519,7 +1621,7 @@
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
 	down(&nd.dentry->d_inode->i_sem);
-	dentry = lookup_hash(&nd.last, nd.dentry);
+	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		/* Why not before? Because we want correct error value */
@@ -1527,6 +1629,7 @@
 			goto slashes;
 		error = vfs_unlink(nd.dentry->d_inode, dentry);
 	exit2:
+		intent_release(dentry, &it);
 		dput(dentry);
 	}
 	up(&nd.dentry->d_inode->i_sem);
@@ -1573,6 +1676,7 @@
 	int error = 0;
 	char * from;
 	char * to;
+	struct lookup_intent it = { .it_op = IT_SYMLINK };
 
 	from = getname(oldname);
 	if(IS_ERR(from))
@@ -1586,10 +1690,12 @@
 		error = path_lookup(to, LOOKUP_PARENT, &nd);
 		if (error)
 			goto out;
-		dentry = lookup_create(&nd, 0);
+		it.it_data = from;
+		dentry = lookup_create(&nd, 0, &it);
 		error = PTR_ERR(dentry);
 		if (!IS_ERR(dentry)) {
 			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+			intent_release(dentry, &it);
 			dput(dentry);
 		}
 		up(&nd.dentry->d_inode->i_sem);
@@ -1654,6 +1760,7 @@
 {
 	int error;
 	char * to;
+	struct lookup_intent it = { .it_op = IT_LINK };
 
 	to = getname(newname);
 	error = PTR_ERR(to);
@@ -1661,7 +1768,7 @@
 		struct dentry *new_dentry;
 		struct nameidata nd, old_nd;
 
-		error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
+		error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, &it);
 		if (error)
 			goto exit;
 		error = path_lookup(to, LOOKUP_PARENT, &nd);
@@ -1715,7 +1822,8 @@
  *	   locking].
  */
 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+		   struct inode *new_dir, struct dentry *new_dentry,
+		   struct lookup_intent *it)
 {
 	int error;
 	struct inode *target;
@@ -1773,6 +1881,7 @@
 		error = -EBUSY;
 	else 
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	intent_release(new_dentry, it);
 	if (target) {
 		if (!error)
 			target->i_flags |= S_DEAD;
@@ -1794,7 +1903,8 @@
 }
 
 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+		     struct inode *new_dir, struct dentry *new_dentry,
+		     struct lookup_intent *it)
 {
 	int error;
 
@@ -1825,6 +1935,7 @@
 		error = -EBUSY;
 	else
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	intent_release(new_dentry, it);
 	double_up(&old_dir->i_zombie, &new_dir->i_zombie);
 	if (error)
 		return error;
@@ -1836,13 +1947,14 @@
 }
 
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct lookup_intent *it)
 {
 	int error;
 	if (S_ISDIR(old_dentry->d_inode->i_mode))
-		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
+		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
 	else
-		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
 	if (!error) {
 		if (old_dir == new_dir)
 			inode_dir_notify(old_dir, DN_RENAME);
@@ -1859,6 +1971,7 @@
 	int error = 0;
 	struct dentry * old_dir, * new_dir;
 	struct dentry * old_dentry, *new_dentry;
+	struct lookup_intent it = { .it_op = IT_RENAME };
 	struct nameidata oldnd, newnd;
 
 	error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
@@ -1884,7 +1997,7 @@
 
 	double_lock(new_dir, old_dir);
 
-	old_dentry = lookup_hash(&oldnd.last, old_dir);
+	old_dentry = lookup_hash_it(&oldnd.last, old_dir, &it);
 	error = PTR_ERR(old_dentry);
 	if (IS_ERR(old_dentry))
 		goto exit3;
@@ -1900,18 +2013,21 @@
 		if (newnd.last.name[newnd.last.len])
 			goto exit4;
 	}
-	new_dentry = lookup_hash(&newnd.last, new_dir);
+	it.it_op = IT_RENAME2;
+	new_dentry = lookup_hash_it(&newnd.last, new_dir, &it);
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
 		goto exit4;
 
 	lock_kernel();
 	error = vfs_rename(old_dir->d_inode, old_dentry,
-				   new_dir->d_inode, new_dentry);
+				   new_dir->d_inode, new_dentry, &it);
 	unlock_kernel();
 
+	intent_release(new_dentry, &it);
 	dput(new_dentry);
 exit4:
+	intent_release(old_dentry, &it); // FIXME: release same intent twice!!!
 	dput(old_dentry);
 exit3:
 	double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
--- kernel-2.4.18-pristine/fs/open.c	2002-10-14 13:47:27.000000000 -0600
+++ kernel-2.4.18/fs/open.c	2002-10-14 14:08:23.000000000 -0600
@@ -19,6 +19,9 @@
 #include <asm/uaccess.h>
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+extern int path_walk_it(const char *name, struct nameidata *nd,
+			struct lookup_intent *it);
+extern void intent_release(struct dentry *de, struct lookup_intent *it);
 
 int vfs_statfs(struct super_block *sb, struct statfs *buf)
 {
@@ -118,12 +121,13 @@
 	struct nameidata nd;
 	struct inode * inode;
 	int error;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
 	error = -EINVAL;
 	if (length < 0)	/* sorry, but loff_t says... */
 		goto out;
 
-	error = user_path_walk(path, &nd);
+	error = user_path_walk_it(path, &nd, &it);
 	if (error)
 		goto out;
 	inode = nd.dentry->d_inode;
@@ -168,6 +172,7 @@
 	put_write_access(inode);
 
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -259,8 +264,9 @@
 	struct nameidata nd;
 	struct inode * inode;
 	struct iattr newattrs;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
-	error = user_path_walk(filename, &nd);
+	error = user_path_walk_it(filename, &nd, &it);
 	if (error)
 		goto out;
 	inode = nd.dentry->d_inode;
@@ -286,6 +292,7 @@
 	}
 	error = notify_change(nd.dentry, &newattrs);
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -303,8 +310,9 @@
 	struct nameidata nd;
 	struct inode * inode;
 	struct iattr newattrs;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
-	error = user_path_walk(filename, &nd);
+	error = user_path_walk_it(filename, &nd, &it);
 
 	if (error)
 		goto out;
@@ -330,6 +338,7 @@
 	}
 	error = notify_change(nd.dentry, &newattrs);
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -346,6 +355,7 @@
 	int old_fsuid, old_fsgid;
 	kernel_cap_t old_cap;
 	int res;
+	struct lookup_intent it = { .it_op = IT_GETATTR };
 
 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 		return -EINVAL;
@@ -363,13 +373,14 @@
 	else
 		current->cap_effective = current->cap_permitted;
 
-	res = user_path_walk(filename, &nd);
+	res = user_path_walk_it(filename, &nd, &it);
 	if (!res) {
 		res = permission(nd.dentry->d_inode, mode);
 		/* SuS v2 requires we report a read only fs too */
 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
 		   && !special_file(nd.dentry->d_inode->i_mode))
 			res = -EROFS;
+		intent_release(nd.dentry, &it);
 		path_release(&nd);
 	}
 
@@ -384,8 +395,11 @@
 {
 	int error;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_GETATTR };
 
-	error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
+	error = __user_walk_it(filename,
+			       LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,
+			       &nd, &it);
 	if (error)
 		goto out;
 
@@ -396,6 +410,7 @@
 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
 
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -435,9 +450,10 @@
 {
 	int error;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_GETATTR };
 
-	error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-		      LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+	error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+			       LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
 	if (error)
 		goto out;
 
@@ -453,6 +469,7 @@
 	set_fs_altroot();
 	error = 0;
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -497,8 +514,9 @@
 	struct inode * inode;
 	int error;
 	struct iattr newattrs;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
-	error = user_path_walk(filename, &nd);
+	error = user_path_walk_it(filename, &nd, &it);
 	if (error)
 		goto out;
 	inode = nd.dentry->d_inode;
@@ -518,6 +536,7 @@
 	error = notify_change(nd.dentry, &newattrs);
 
 dput_and_out:
+	intent_release(nd.dentry, &it);
 	path_release(&nd);
 out:
 	return error;
@@ -587,10 +606,12 @@
 {
 	struct nameidata nd;
 	int error;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
-	error = user_path_walk(filename, &nd);
+	error = user_path_walk_it(filename, &nd, &it);
 	if (!error) {
 		error = chown_common(nd.dentry, user, group);
+		intent_release(nd.dentry, &it);
 		path_release(&nd);
 	}
 	return error;
@@ -600,10 +621,12 @@
 {
 	struct nameidata nd;
 	int error;
+	struct lookup_intent it = { .it_op = IT_SETATTR };
 
-	error = user_path_walk_link(filename, &nd);
+	error = user_path_walk_link_it(filename, &nd, &it);
 	if (!error) {
 		error = chown_common(nd.dentry, user, group);
+		intent_release(nd.dentry, &it);
 		path_release(&nd);
 	}
 	return error;
@@ -637,10 +660,16 @@
  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
  * used by symlinks.
  */
+extern int open_namei_it(const char *filename, int namei_flags, int mode,
+			 struct nameidata *nd, struct lookup_intent *it);
+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
+			    int flags, struct lookup_intent *it);
+
 struct file *filp_open(const char * filename, int flags, int mode)
 {
 	int namei_flags, error;
 	struct nameidata nd;
+	struct lookup_intent it = { .it_op = IT_OPEN };
 
 	namei_flags = flags;
 	if ((namei_flags+1) & O_ACCMODE)
@@ -648,14 +677,15 @@
 	if (namei_flags & O_TRUNC)
 		namei_flags |= 2;
 
-	error = open_namei(filename, namei_flags, mode, &nd);
-	if (!error)
-		return dentry_open(nd.dentry, nd.mnt, flags);
-
-	return ERR_PTR(error);
+	error = open_namei_it(filename, namei_flags, mode, &nd, &it);
+ 	if (error)
+ 		return ERR_PTR(error);
+ 
+ 	return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
 }
 
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
+ 			    int flags, struct lookup_intent *it)
 {
 	struct file * f;
 	struct inode *inode;
@@ -698,6 +728,7 @@
 	}
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
+	intent_release(dentry, it);
 	return f;
 
 cleanup_all:
@@ -712,11 +743,17 @@
 cleanup_file:
 	put_filp(f);
 cleanup_dentry:
+	intent_release(dentry, it);
 	dput(dentry);
 	mntput(mnt);
 	return ERR_PTR(error);
 }
 
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	return dentry_open_it(dentry, mnt, flags, NULL);
+}
+
 /*
  * Find an empty file descriptor entry, and mark it busy.
  */
--- kernel-2.4.18-pristine/fs/stat.c	2002-10-14 13:47:27.000000000 -0600
+++ kernel-2.4.18/fs/stat.c	2002-10-14 14:08:23.000000000 -0600
@@ -13,6 +13,7 @@
 
 #include <asm/uaccess.h>
 
+extern void intent_release(struct dentry *de, struct lookup_intent *it);
 /*
  * Revalidate the inode. This is required for proper NFS attribute caching.
  */
@@ -104,10 +105,12 @@
 {
 	struct nameidata nd;
 	int error;
+	struct lookup_intent it = { .it_op = IT_GETATTR };
 
-	error = user_path_walk(name, &nd);
+	error = user_path_walk_it(name, &nd, &it);
 	if (!error) {
 		error = do_getattr(nd.mnt, nd.dentry, stat);
+		intent_release(nd.dentry, &it);
 		path_release(&nd);
 	}
 	return error;
@@ -117,10 +120,12 @@
 {
 	struct nameidata nd;
 	int error;
+	struct lookup_intent it = { .it_op = IT_GETATTR };
 
-	error = user_path_walk_link(name, &nd);
+	error = user_path_walk_link_it(name, &nd, &it);
 	if (!error) {
 		error = do_getattr(nd.mnt, nd.dentry, stat);
+		intent_release(nd.dentry, &it);
 		path_release(&nd);
 	}
 	return error;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] vfs intent lookup patch
  2002-10-14 22:53 [RFC] vfs intent lookup patch Peter Braam
@ 2002-10-15 15:58 ` William A.(Andy) Adamson
  2002-10-16 21:30   ` Peter Braam
  0 siblings, 1 reply; 5+ messages in thread
From: William A.(Andy) Adamson @ 2002-10-15 15:58 UTC (permalink / raw)
  To: Peter Braam; +Cc: viro, linux-fsdevel, nfsv4-wg

hi peter

i've looked over your patch WRT NFSv4 open semantics. for regular files, the 
NFSv4 open combines lookup, open, and create. this means that we need both the 
mode and the open flags to satisfy the OPEN operation, which we would 
implement in the nfsv4 version of lookup2. as i follow the code path in your 
patch:

filp_open -> open_namei_it which receives both the mode and the open flags. 
but then the open flags are lost in the call to path_lookup_it, which 
translates the open flags into lookup_flags. so, if i'm reading the code 
correctly, NFSv4 would require another field in the intent struct to hold the 
open flags.

-->Andy


> Hi Al,
> 
> Attached is a 2.4 patch for lookup intents, which we use with Lustre.
> This patch adds a lookup2, revalidate2 and intent_release method.
> None of those are required if the kernel changes the parameters
> globally, but I wanted to keep things simple.
> 
> The purpose of the patch is to give the lookup call sufficient
> information about the "intent" of the lookup.  Intents can be tags
> like "a create/open is coming".  With the intent patch, distributed
> O_EXCL problems, from which NFS suffers too, are easily addressed.
> For Lustre, which runs on 1,000+ node clusters, reducing file system
> transactions to a single RPC from ->lookup is the core benefit.  For
> InterMezzo the intent opens the opportunity to refresh the cache
> before the ->create/mkdir etc methods lock anything, which is very
> beneficial.  This is much along the lines of what we discussed over
> the last year and a bit.
> 
> We would like to submit something like this for 2.5, for use with
> Lustre and InterMezzo. 
> 
> The 2.5 patch is similar, but in 2.5 the struct intent is conveniently
> embedded in the struct nameidata making things quite a bit simpler.
> Intent_release is done as part of releasing the nameidata and still
> requires a new dentry method.  We will send you this patch in a few
> days time.
> 
> However, I suspect that after you review this you might come up with
> an equivalent but better way of doing this.  Particularly interesting
> to us is how the d_it field we added to the dentry can be protected
> between the return from lookup and the use of that field in
> ->create/mkdir etc.  We do that with a semaphore (which we placed
> under d_fsdata); alternatives are passing the nameidata or intent to
> every inode operation that requires the intent.
> 
> Best wishes,
> 
> - Peter -
> 



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] vfs intent lookup patch
@ 2002-10-16  0:49 Bryan Henderson
  0 siblings, 0 replies; 5+ messages in thread
From: Bryan Henderson @ 2002-10-16  0:49 UTC (permalink / raw)
  To: Peter Braam; +Cc: linux-fsdevel, linux-fsdevel-owner, viro


>Attached is a 2.4 patch for lookup intents, which we use with Lustre.
>This patch adds a lookup2, revalidate2 and intent_release method.
>None of those are required if the kernel changes the parameters
>globally, but I wanted to keep things simple.
>
>The purpose of the patch is to give the lookup call sufficient
>information about the "intent" of the lookup.  Intents can be tags
>like "a create/open is coming".  With the intent patch, distributed
>O_EXCL problems, from which NFS suffers too, are easily addressed.
>For Lustre, which runs on 1,000+ node clusters, reducing file system
>transactions to a single RPC from ->lookup is the core benefit.

This lookup intent scheme appears to be a locking scheme -- to allow one to
do a lookup plus another operation atomically.  From what you say about the
one RPC, I think it might also be a scheme actually to combine two
operations into one.

This makes me wonder if it wouldn't be more straightforward just to make
combined versions of all those operations -- in effect, a higher level VFS.
For example, create-and-open is a common VFS operation (its absence from
Linux is why O_EXCL can't work in shared filesystems).  Some of the other
combinations aren't as obviously useful to me, but a "lookup-and-unlink"
looks better to me than a "lookup-for-unlink" followed by "unlink" or
"never-mind".

Otherwise, it seems to me you're building into the filesystem driver
unnatural restrictions on the sequence in which FS makes the VFS calls.




^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC] vfs intent lookup patch
@ 2002-10-16  4:24 Steven French
  0 siblings, 0 replies; 5+ messages in thread
From: Steven French @ 2002-10-16  4:24 UTC (permalink / raw)
  To: linux-fsdevel


> patch WRT NFSv4 open semantics. for regular files,
> the NFSv4 open combines lookup, open, and create.

The cifs semantics are similar.  Although we can handle a distinct lookup
before
open or create it is not really necessary.   A loosely related problem is
that create
has the side effect of opening the file in the cifs protocol (open and
create only
differ in the flags passed in the NTCreateX  frame and they both return
network
file handles).  The VFS semantics seem to require that a cifs filesystem
close
the file that we just created even though it is frequently just the first
part of what the
application level probably considers an atomic open operation with the
create flag
specified and this makes setting the mode tricky too since files are
sometimes opened
after a create with open flags which would conflict with the mode set
earlier on the
create call.   Andy's suggestion about adding an optional call at the vfs
layer is
interesting.

Steve French
Senior Software Engineer
Linux Technology Center - IBM Austin
phone: 512-838-2294
email: sfrench@us.ibm.com



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] vfs intent lookup patch
  2002-10-15 15:58 ` William A.(Andy) Adamson
@ 2002-10-16 21:30   ` Peter Braam
  0 siblings, 0 replies; 5+ messages in thread
From: Peter Braam @ 2002-10-16 21:30 UTC (permalink / raw)
  To: William A.(Andy) Adamson; +Cc: viro, linux-fsdevel, nfsv4-wg

Ah yes, I thought that might be useful.  Similarly if you wanted to do
setattr with an intent you'd need a pointer to iattr's to do so.

- Peter -

On Tue, Oct 15, 2002 at 11:58:00AM -0400, William A.(Andy) Adamson wrote:
> hi peter
> 
> i've looked over your patch WRT NFSv4 open semantics. for regular files, the 
> NFSv4 open combines lookup, open, and create. this means that we need both the 
> mode and the open flags to satisfy the OPEN operation, which we would 
> implement in the nfsv4 version of lookup2. as i follow the code path in your 
> patch:
> 
> filp_open -> open_namei_it which receives both the mode and the open flags. 
> but then the open flags are lost in the call to path_lookup_it, which 
> translates the open flags into lookup_flags. so, if i'm reading the code 
> correctly, NFSv4 would require another field in the intent struct to hold the 
> open flags.
> 
> -->Andy
> 
> 
> > Hi Al,
> > 
> > Attached is a 2.4 patch for lookup intents, which we use with Lustre.
> > This patch adds a lookup2, revalidate2 and intent_release method.
> > None of those are required if the kernel changes the parameters
> > globally, but I wanted to keep things simple.
> > 
> > The purpose of the patch is to give the lookup call sufficient
> > information about the "intent" of the lookup.  Intents can be tags
> > like "a create/open is coming".  With the intent patch, distributed
> > O_EXCL problems, from which NFS suffers too, are easily addressed.
> > For Lustre, which runs on 1,000+ node clusters, reducing file system
> > transactions to a single RPC from ->lookup is the core benefit.  For
> > InterMezzo the intent opens the opportunity to refresh the cache
> > before the ->create/mkdir etc methods lock anything, which is very
> > beneficial.  This is much along the lines of what we discussed over
> > the last year and a bit.
> > 
> > We would like to submit something like this for 2.5, for use with
> > Lustre and InterMezzo. 
> > 
> > The 2.5 patch is similar, but in 2.5 the struct intent is conveniently
> > embedded in the struct nameidata making things quite a bit simpler.
> > Intent_release is done as part of releasing the nameidata and still
> > requires a new dentry method.  We will send you this patch in a few
> > days time.
> > 
> > However, I suspect that after you review this you might come up with
> > an equivalent but better way of doing this.  Particularly interesting
> > to us is how the d_it field we added to the dentry can be protected
> > between the return from lookup and the use of that field in
> > ->create/mkdir etc.  We do that with a semaphore (which we placed
> > under d_fsdata); alternatives are passing the nameidata or intent to
> > every inode operation that requires the intent.
> > 
> > Best wishes,
> > 
> > - Peter -
> > 
- Peter -

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2002-10-16 21:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-10-14 22:53 [RFC] vfs intent lookup patch Peter Braam
2002-10-15 15:58 ` William A.(Andy) Adamson
2002-10-16 21:30   ` Peter Braam
  -- strict thread matches above, loose matches on Subject: below --
2002-10-16  0:49 Bryan Henderson
2002-10-16  4:24 Steven French

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.