* [PATCH AUTOSEL 6.6 03/11] nfs: don't share pNFS DS connections between net namespaces
[not found] <20250519212237.1986368-1-sashal@kernel.org>
@ 2025-05-19 21:22 ` Sasha Levin
2025-05-19 21:22 ` [PATCH AUTOSEL 6.6 11/11] NFS: Avoid flushing data while holding directory locks in nfs_rename() Sasha Levin
1 sibling, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2025-05-19 21:22 UTC (permalink / raw)
To: patches, stable
Cc: Jeff Layton, Omar Sandoval, Sargun Dillon, Benjamin Coddington,
Trond Myklebust, Sasha Levin, anna.schumaker, linux-nfs,
linux-kernel
From: Jeff Layton <jlayton@kernel.org>
[ Upstream commit 6b9785dc8b13d9fb75ceec8cf4ea7ec3f3b1edbc ]
Currently, different NFS clients can share the same DS connections, even
when they are in different net namespaces. If a containerized client
creates a DS connection, another container can find and use it. When the
first client exits, the connection will close which can lead to stalls
in other clients.
Add a net namespace pointer to struct nfs4_pnfs_ds, and compare those
value to the caller's netns in _data_server_lookup_locked() when
searching for a nfs4_pnfs_ds to match.
Reported-by: Omar Sandoval <osandov@osandov.com>
Reported-by: Sargun Dillon <sargun@sargun.me>
Closes: https://lore.kernel.org/linux-nfs/Z_ArpQC_vREh_hEA@telecaster/
Tested-by: Sargun Dillon <sargun@sargun.me>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Link: https://lore.kernel.org/r/20250410-nfs-ds-netns-v2-1-f80b7979ba80@kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/nfs/filelayout/filelayoutdev.c | 6 +++---
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +++---
fs/nfs/pnfs.h | 4 +++-
fs/nfs/pnfs_nfs.c | 9 +++++----
4 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index acf4b88889dc3..d5f1fbfd9a0c7 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -75,6 +75,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
+ struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -158,8 +159,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -169,7 +169,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_deviceid;
}
- dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f6..d21c5ecfbf1cc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_pnfs_ds_addr *da;
struct nfs4_ff_layout_ds *new_ds = NULL;
struct nfs4_ff_ds_version *ds_versions = NULL;
+ struct net *net = server->nfs_client->cl_net;
u32 mp_count;
u32 version_count;
__be32 *p;
@@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
for (i = 0; i < mp_count; i++) {
/* multipath ds */
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
new_ds->ds_versions = ds_versions;
new_ds->ds_versions_cnt = version_count;
- new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!new_ds->ds)
goto out_err_drain_dsaddrs;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d886c8226d8fe..79996d7dad0f7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -59,6 +59,7 @@ struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
+ const struct net *ds_net;
struct nfs_client *ds_clp;
refcount_t ds_count;
unsigned long ds_state;
@@ -405,7 +406,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode,
int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max);
void pnfs_generic_write_commit_done(struct rpc_task *task, void *data);
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
-struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
+struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net,
+ struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 88e061bd711b7..1b317c44da126 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -651,12 +651,12 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(const struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
- if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
+ if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
}
@@ -763,7 +763,7 @@ nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
* uncached and return cached struct nfs4_pnfs_ds.
*/
struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
@@ -781,13 +781,14 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(dsaddrs);
+ tmp_ds = _data_server_lookup_locked(net, dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_net = net;
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
--
2.39.5
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH AUTOSEL 6.6 11/11] NFS: Avoid flushing data while holding directory locks in nfs_rename()
[not found] <20250519212237.1986368-1-sashal@kernel.org>
2025-05-19 21:22 ` [PATCH AUTOSEL 6.6 03/11] nfs: don't share pNFS DS connections between net namespaces Sasha Levin
@ 2025-05-19 21:22 ` Sasha Levin
1 sibling, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2025-05-19 21:22 UTC (permalink / raw)
To: patches, stable
Cc: Trond Myklebust, Jeff Layton, Sasha Levin, anna.schumaker,
linux-nfs, linux-kernel
From: Trond Myklebust <trond.myklebust@hammerspace.com>
[ Upstream commit dcd21b609d4abc7303f8683bce4f35d78d7d6830 ]
The Linux client assumes that all filehandles are non-volatile for
renames within the same directory (otherwise sillyrename cannot work).
However, the existence of the Linux 'subtree_check' export option has
meant that nfs_rename() has always assumed it needs to flush writes
before attempting to rename.
Since NFSv4 does allow the client to query whether or not the server
exhibits this behaviour, and since knfsd does actually set the
appropriate flag when 'subtree_check' is enabled on an export, it
should be OK to optimise away the write flushing behaviour in the cases
where it is clearly not needed.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/nfs/client.c | 2 ++
fs/nfs/dir.c | 15 ++++++++++++++-
include/linux/nfs_fs_sb.h | 12 +++++++++---
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 62607d52bfa5e..aa09f930eeaf7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1080,6 +1080,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
server->namelen = NFS2_MAXNAMLEN;
}
+ /* Linux 'subtree_check' borkenness mandates this setting */
+ server->fh_expire_type = NFS_FH_VOL_RENAME;
if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 39f7549afcf5b..3891863842359 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2642,6 +2642,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
unblock_revalidate(new_dentry);
}
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ return false;
+ if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+ return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+ return true;
+}
+
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2729,7 +2741,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
- if (S_ISREG(old_inode->i_mode))
+ if (S_ISREG(old_inode->i_mode) &&
+ nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 86d96e00c2e3d..374b1b208bd89 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -199,6 +199,15 @@ struct nfs_server {
char *fscache_uniq; /* Uniquifier (or NULL) */
#endif
+ /* The following #defines numerically match the NFSv4 equivalents */
+#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1)
+#define NFS_FH_VOLATILE_ANY (0x2)
+#define NFS_FH_VOL_MIGRATION (0x4)
+#define NFS_FH_VOL_RENAME (0x8)
+#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME)
+ u32 fh_expire_type; /* V4 bitmask representing file
+ handle volatility type for
+ this filesystem */
u32 pnfs_blksize; /* layout_blksize attr */
#if IS_ENABLED(CONFIG_NFS_V4)
u32 attr_bitmask[3];/* V4 bitmask representing the set
@@ -222,9 +231,6 @@ struct nfs_server {
u32 acl_bitmask; /* V4 bitmask representing the ACEs
that are supported on this
filesystem */
- u32 fh_expire_type; /* V4 bitmask representing file
- handle volatility type for
- this filesystem */
struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
struct rpc_wait_queue roc_rpcwaitq;
void *pnfs_ld_data; /* per mount point data */
--
2.39.5
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-05-19 21:22 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20250519212237.1986368-1-sashal@kernel.org>
2025-05-19 21:22 ` [PATCH AUTOSEL 6.6 03/11] nfs: don't share pNFS DS connections between net namespaces Sasha Levin
2025-05-19 21:22 ` [PATCH AUTOSEL 6.6 11/11] NFS: Avoid flushing data while holding directory locks in nfs_rename() Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox