Linux NFS development
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Al Viro <viro@zeniv.linux.org.uk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Daire Byrne <daire@dneg.com>,
	Trond Myklebust <trond.myklebust@hammerspace.com>,
	Chuck Lever <chuck.lever@oracle.com>
Cc: Linux NFS Mailing List <linux-nfs@vger.kernel.org>,
	linux-fsdevel@vger.kernel.org,
	LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 10/10] NFS: support parallel updates in the one directory.
Date: Fri, 26 Aug 2022 12:10:43 +1000	[thread overview]
Message-ID: <166147984378.25420.4023980607067991846.stgit@noble.brown> (raw)
In-Reply-To: <166147828344.25420.13834885828450967910.stgit@noble.brown>

NFS can easily support parallel updates as the locking is done on the
server, so this patch enables parallel updates for NFS.

Handling of silly-rename - both for unlink and for the rename target -
requires some care as we need to get an exclusive lock on the chosen
silly name and for rename we need to keep the original target name
locked after it has been renamed to the silly name.

So nfs_sillyrename() now uses d_exchange() to swap the target and the
silly name after the silly-rename has happened on the server, and the
silly dentry - which now has the name of the target - is returned.

For unlink(), this is immediately unlocked and discarded with a call to
nfs_sillyrename_finish().  For rename it is kept locked until the
originally requested rename completes.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/dcache.c         |    5 ++++-
 fs/nfs/dir.c        |   28 ++++++++++++++++------------
 fs/nfs/fs_context.c |    6 ++++--
 fs/nfs/internal.h   |    3 ++-
 fs/nfs/unlink.c     |   51 ++++++++++++++++++++++++++++++++++++++-------------
 5 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 9bf346a9de52..a5eaab16d39f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3056,7 +3056,9 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
 	write_seqlock(&rename_lock);
 
 	WARN_ON(!dentry1->d_inode);
-	WARN_ON(!dentry2->d_inode);
+	/* Allow dentry2 to be negative, so we can do a rename
+	 * but keep both names locked (DCACHE_PAR_UPDATE)
+	 */
 	WARN_ON(IS_ROOT(dentry1));
 	WARN_ON(IS_ROOT(dentry2));
 
@@ -3064,6 +3066,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
 
 	write_sequnlock(&rename_lock);
 }
+EXPORT_SYMBOL(d_exchange);
 
 /**
  * d_ancestor - search for an ancestor
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5d6c2ddc7ea6..fbb608fbe6bf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1935,8 +1935,12 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	/*
 	 * If we're doing an exclusive create, optimize away the lookup
 	 * but don't hash the dentry.
+	 * A silly_rename is marked exclusive, but we need to do an
+	 * explicit lookup.
 	 */
-	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
+	if ((nfs_is_exclusive_create(dir, flags) ||
+	     flags & LOOKUP_RENAME_TARGET) &&
+	    !(flags & LOOKUP_SILLY_RENAME))
 		return NULL;
 
 	res = ERR_PTR(-ENOMEM);
@@ -2472,10 +2476,14 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	spin_lock(&dentry->d_lock);
 	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
 					     &NFS_I(d_inode(dentry))->flags)) {
+		struct dentry *silly;
+
 		spin_unlock(&dentry->d_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(d_inode(dentry), 0);
-		error = nfs_sillyrename(dir, dentry);
+		silly = nfs_sillyrename(dir, dentry);
+		error = PTR_ERR_OR_ZERO(silly);
+		nfs_sillyrename_finish(dir, silly);
 		goto out;
 	}
 	/* We must prevent any concurrent open until the unlink
@@ -2685,16 +2693,12 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 
 			spin_unlock(&new_dentry->d_lock);
 
-			/* copy the target dentry's name */
-			dentry = d_alloc(new_dentry->d_parent,
-					 &new_dentry->d_name);
-			if (!dentry)
-				goto out;
-
 			/* silly-rename the existing target ... */
-			err = nfs_sillyrename(new_dir, new_dentry);
-			if (err)
+			dentry = nfs_sillyrename(new_dir, new_dentry);
+			if (IS_ERR(dentry)) {
+				err = PTR_ERR(dentry);
 				goto out;
+			}
 
 			new_dentry = dentry;
 			new_inode = NULL;
@@ -2750,9 +2754,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 	} else if (error == -ENOENT)
 		nfs_dentry_handle_enoent(old_dentry);
 
-	/* new dentry created? */
 	if (dentry)
-		dput(dentry);
+		nfs_sillyrename_finish(new_dir, dentry);
+
 	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_rename);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 4da701fd1424..7133ca9433d2 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1577,7 +1577,8 @@ struct file_system_type nfs_fs_type = {
 	.init_fs_context	= nfs_init_fs_context,
 	.parameters		= nfs_fs_parameters,
 	.kill_sb		= nfs_kill_super,
-	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA|
+				  FS_PAR_DIR_UPDATE,
 };
 MODULE_ALIAS_FS("nfs");
 EXPORT_SYMBOL_GPL(nfs_fs_type);
@@ -1589,7 +1590,8 @@ struct file_system_type nfs4_fs_type = {
 	.init_fs_context	= nfs_init_fs_context,
 	.parameters		= nfs_fs_parameters,
 	.kill_sb		= nfs_kill_super,
-	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+	.fs_flags		= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA|
+				  FS_PAR_DIR_UPDATE,
 };
 MODULE_ALIAS_FS("nfs4");
 MODULE_ALIAS("nfs4");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 27c720d71b4e..3a7fd30a8e29 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -611,7 +611,8 @@ extern struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 		 struct dentry *old_dentry, struct dentry *new_dentry,
 		 void (*complete)(struct rpc_task *, struct nfs_renamedata *));
-extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern struct dentry *nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern void nfs_sillyrename_finish(struct inode *dir, struct dentry *dentry);
 
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9697cd5d2561..c8a718f09fe6 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -428,6 +428,10 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
  *
  * The final cleanup is done during dentry_iput.
  *
+ * We exchange the original with the new (silly) dentries, and return
+ * the new dentry which will have the original name.  This ensures that
+ * the target name remains locked until the rename completes.
+ *
  * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
  * could take responsibility for keeping open files referenced.  The server
  * would also need to ensure that opened-but-deleted files were kept over
@@ -436,7 +440,7 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
  * use to advertise that it does this; some day we may take advantage of
  * it.))
  */
-int
+struct dentry *
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 {
 	static unsigned int sillycounter;
@@ -445,6 +449,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	struct dentry *sdentry;
 	struct inode *inode = d_inode(dentry);
 	struct rpc_task *task;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	struct path path = {};
 	int            error = -EBUSY;
 
 	dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
@@ -459,10 +465,11 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 
 	fileid = NFS_FILEID(d_inode(dentry));
 
+	path.dentry = d_find_alias(dir);
 	sdentry = NULL;
 	do {
 		int slen;
-		dput(sdentry);
+
 		sillycounter++;
 		slen = scnprintf(silly, sizeof(silly),
 				SILLYNAME_PREFIX "%0*llx%0*x",
@@ -472,14 +479,19 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 		dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
 				dentry, silly);
 
-		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
-		/*
-		 * N.B. Better to return EBUSY here ... it could be
-		 * dangerous to delete the file while it's in use.
-		 */
-		if (IS_ERR(sdentry))
-			goto out;
-	} while (d_inode(sdentry) != NULL); /* need negative lookup */
+		sdentry = filename_create_one_len(silly, slen,
+						  &path,
+						  LOOKUP_CREATE | LOOKUP_EXCL |
+						  LOOKUP_SILLY_RENAME,
+						  &wq);
+	} while (PTR_ERR_OR_ZERO(sdentry) == -EEXIST);
+	dput(path.dentry);
+	/*
+	 * N.B. Better to return EBUSY here ... it could be
+	 * dangerous to delete the file while it's in use.
+	 */
+	if (IS_ERR(sdentry))
+		goto out;
 
 	ihold(inode);
 
@@ -513,7 +525,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 						     NFS_INO_INVALID_CTIME |
 						     NFS_INO_REVAL_FORCED);
 		spin_unlock(&inode->i_lock);
-		d_move(dentry, sdentry);
+		d_exchange(dentry, sdentry);
 		break;
 	case -ERESTARTSYS:
 		/* The result of the rename is unknown. Play it safe by
@@ -524,7 +536,20 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	rpc_put_task(task);
 out_dput:
 	iput(inode);
-	dput(sdentry);
+	if (!error)
+		return sdentry;
+
+	d_lookup_done(sdentry);
+	__done_path_update(&path, sdentry, true, LOOKUP_SILLY_RENAME);
 out:
-	return error;
+	return ERR_PTR(error);
+}
+
+void nfs_sillyrename_finish(struct inode *dir, struct dentry *dentry)
+{
+	struct path path = { .dentry = d_find_alias(dir) };
+
+	if (!IS_ERR(dentry))
+		__done_path_update(&path, dentry, true, LOOKUP_SILLY_RENAME);
+	dput(path.dentry);
 }



  reply	other threads:[~2022-08-26  2:19 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-26  2:10 [PATCH/RFC 00/10 v5] Improve scalability of directory operations NeilBrown
2022-08-26  2:10 ` NeilBrown [this message]
2022-08-26 15:31   ` [PATCH 10/10] NFS: support parallel updates in the one directory John Stoffel
2022-08-26 23:13     ` NeilBrown
2022-08-26  2:10 ` [PATCH 09/10] VFS: add LOOKUP_SILLY_RENAME NeilBrown
2022-08-27  1:21   ` Al Viro
2022-08-29  3:15     ` NeilBrown
2022-08-26  2:10 ` [PATCH 06/10] VFS: support concurrent renames NeilBrown
2022-08-27  4:12   ` Al Viro
2022-08-29  3:08     ` NeilBrown
2022-08-26  2:10 ` [PATCH 08/10] NFSD: allow parallel creates from nfsd NeilBrown
2022-08-27  4:37   ` Al Viro
2022-08-29  3:12     ` NeilBrown
2022-08-26  2:10 ` [PATCH 05/10] VFS: export done_path_update() NeilBrown
2022-08-26  2:10 ` [PATCH 04/10] VFS: move dput() and mnt_drop_write() into done_path_update() NeilBrown
2022-08-26  2:10 ` [PATCH 03/10] VFS: move want_write checks into lookup_hash_update() NeilBrown
2022-08-27  3:48   ` Al Viro
2022-08-26  2:10 ` [PATCH 07/10] VFS: hold DCACHE_PAR_UPDATE lock across d_revalidate() NeilBrown
2022-08-26  2:10 ` [PATCH 01/10] VFS: support parallel updates in the one directory NeilBrown
2022-08-26 19:06   ` Linus Torvalds
2022-08-26 23:06     ` NeilBrown
2022-08-27  0:13       ` Linus Torvalds
2022-08-27  0:23         ` Al Viro
2022-08-27 21:14         ` Al Viro
2022-08-27  0:17     ` Al Viro
2022-09-01  0:31       ` NeilBrown
2022-09-01  3:44         ` Al Viro
2022-08-27  3:43   ` Al Viro
2022-08-29  1:59     ` NeilBrown
2022-09-03  0:06       ` Al Viro
2022-09-03  1:40         ` NeilBrown
2022-09-03  2:12           ` Al Viro
2022-09-03 17:52             ` Al Viro
2022-09-04 23:33               ` NeilBrown
2022-08-26  2:10 ` [PATCH 02/10] VFS: move EEXIST and ENOENT tests into lookup_hash_update() NeilBrown
2022-08-26 14:42 ` [PATCH/RFC 00/10 v5] Improve scalability of directory operations John Stoffel
2022-08-26 23:30   ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166147984378.25420.4023980607067991846.stgit@noble.brown \
    --to=neilb@suse.de \
    --cc=chuck.lever@oracle.com \
    --cc=daire@dneg.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@hammerspace.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox