Linux NFS development
 help / color / mirror / Atom feed
* [PATCH v2] NFS: Avoid flushing data while holding directory locks in nfs_rename()
@ 2025-04-28 16:49 trondmy
  2025-04-28 21:35 ` Jeff Layton
  0 siblings, 1 reply; 2+ messages in thread
From: trondmy @ 2025-04-28 16:49 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The Linux client assumes that all filehandles are non-volatile for
renames within the same directory (otherwise sillyrename cannot work).
However, the existence of the Linux 'subtree_check' export option has
meant that nfs_rename() has always assumed it needs to flush writes
before attempting to rename.

Since NFSv4 does allow the client to query whether or not the server
exhibits this behaviour, and since knfsd does actually set the
appropriate flag when 'subtree_check' is enabled on an export, it
should be OK to optimise away the write flushing behaviour in the cases
where it is clearly not needed.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c           |  2 ++
 fs/nfs/dir.c              | 15 ++++++++++++++-
 include/linux/nfs_fs_sb.h | 12 +++++++++---
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2115c1189c2d..6d63b958c4bb 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
 		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
 			server->namelen = NFS2_MAXNAMLEN;
 	}
+	/* Linux 'subtree_check' borkenness mandates this setting */
+	server->fh_expire_type = NFS_FH_VOL_RENAME;
 
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bd23fc736b39..d0e0b435a843 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
 	unblock_revalidate(new_dentry);
 }
 
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+					   struct dentry *new_dentry)
+{
+	struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+	if (old_dentry->d_parent != new_dentry->d_parent)
+		return false;
+	if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+		return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+	return true;
+}
+
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
 	}
 
-	if (S_ISREG(old_inode->i_mode))
+	if (S_ISREG(old_inode->i_mode) &&
+	    nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
 		nfs_sync_inode(old_inode);
 	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
 				must_unblock ? nfs_unblock_rename : NULL);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 71319637a84e..ee03f3cef30c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -213,6 +213,15 @@ struct nfs_server {
 	char			*fscache_uniq;	/* Uniquifier (or NULL) */
 #endif
 
+	/* The following #defines numerically match the NFSv4 equivalents */
+#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
+#define NFS_FH_VOLATILE_ANY (0x2)
+#define NFS_FH_VOL_MIGRATION (0x4)
+#define NFS_FH_VOL_RENAME (0x8)
+#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
+	u32			fh_expire_type;	/* V4 bitmask representing file
+						   handle volatility type for
+						   this filesystem */
 	u32			pnfs_blksize;	/* layout_blksize attr */
 #if IS_ENABLED(CONFIG_NFS_V4)
 	u32			attr_bitmask[3];/* V4 bitmask representing the set
@@ -236,9 +245,6 @@ struct nfs_server {
 	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
 						   that are supported on this
 						   filesystem */
-	u32			fh_expire_type;	/* V4 bitmask representing file
-						   handle volatility type for
-						   this filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 	struct rpc_wait_queue	roc_rpcwaitq;
 	void			*pnfs_ld_data;	/* per mount point data */
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] NFS: Avoid flushing data while holding directory locks in nfs_rename()
  2025-04-28 16:49 [PATCH v2] NFS: Avoid flushing data while holding directory locks in nfs_rename() trondmy
@ 2025-04-28 21:35 ` Jeff Layton
  0 siblings, 0 replies; 2+ messages in thread
From: Jeff Layton @ 2025-04-28 21:35 UTC (permalink / raw)
  To: trondmy, Anna Schumaker; +Cc: linux-nfs

On Mon, 2025-04-28 at 09:49 -0700, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> The Linux client assumes that all filehandles are non-volatile for
> renames within the same directory (otherwise sillyrename cannot work).
> However, the existence of the Linux 'subtree_check' export option has
> meant that nfs_rename() has always assumed it needs to flush writes
> before attempting to rename.
> 
> Since NFSv4 does allow the client to query whether or not the server
> exhibits this behaviour, and since knfsd does actually set the
> appropriate flag when 'subtree_check' is enabled on an export, it
> should be OK to optimise away the write flushing behaviour in the cases
> where it is clearly not needed.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/client.c           |  2 ++
>  fs/nfs/dir.c              | 15 ++++++++++++++-
>  include/linux/nfs_fs_sb.h | 12 +++++++++---
>  3 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 2115c1189c2d..6d63b958c4bb 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
>  		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
>  			server->namelen = NFS2_MAXNAMLEN;
>  	}
> +	/* Linux 'subtree_check' borkenness mandates this setting */
> +	server->fh_expire_type = NFS_FH_VOL_RENAME;
>  
>  	if (!(fattr->valid & NFS_ATTR_FATTR)) {
>  		error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index bd23fc736b39..d0e0b435a843 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
>  	unblock_revalidate(new_dentry);
>  }
>  
> +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
> +					   struct dentry *new_dentry)
> +{
> +	struct nfs_server *server = NFS_SB(old_dentry->d_sb);
> +
> +	if (old_dentry->d_parent != new_dentry->d_parent)
> +		return false;
> +	if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
> +		return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
> +	return true;
> +}
> +
>  /*
>   * RENAME
>   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
> @@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
>  
>  	}
>  
> -	if (S_ISREG(old_inode->i_mode))
> +	if (S_ISREG(old_inode->i_mode) &&
> +	    nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
>  		nfs_sync_inode(old_inode);
>  	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
>  				must_unblock ? nfs_unblock_rename : NULL);
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 71319637a84e..ee03f3cef30c 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -213,6 +213,15 @@ struct nfs_server {
>  	char			*fscache_uniq;	/* Uniquifier (or NULL) */
>  #endif
>  
> +	/* The following #defines numerically match the NFSv4 equivalents */
> +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
> +#define NFS_FH_VOLATILE_ANY (0x2)
> +#define NFS_FH_VOL_MIGRATION (0x4)
> +#define NFS_FH_VOL_RENAME (0x8)
> +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
> +	u32			fh_expire_type;	/* V4 bitmask representing file
> +						   handle volatility type for
> +						   this filesystem */
>  	u32			pnfs_blksize;	/* layout_blksize attr */
>  #if IS_ENABLED(CONFIG_NFS_V4)
>  	u32			attr_bitmask[3];/* V4 bitmask representing the set
> @@ -236,9 +245,6 @@ struct nfs_server {
>  	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
>  						   that are supported on this
>  						   filesystem */
> -	u32			fh_expire_type;	/* V4 bitmask representing file
> -						   handle volatility type for
> -						   this filesystem */
>  	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
>  	struct rpc_wait_queue	roc_rpcwaitq;
>  	void			*pnfs_ld_data;	/* per mount point data */

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-04-28 21:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-28 16:49 [PATCH v2] NFS: Avoid flushing data while holding directory locks in nfs_rename() trondmy
2025-04-28 21:35 ` Jeff Layton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox