From: Mike Snitzer <snitzer@kernel.org>
To: linux-nfs@vger.kernel.org
Cc: Jeff Layton <jlayton@kernel.org>,
Chuck Lever <chuck.lever@oracle.com>,
Trond Myklebust <trondmy@hammerspace.com>,
NeilBrown <neilb@suse.de>,
snitzer@hammerspace.com
Subject: [PATCH v6 15/18] nfsd: use SRCU to dereference nn->nfsd_serv
Date: Wed, 19 Jun 2024 16:40:29 -0400 [thread overview]
Message-ID: <20240619204032.93740-16-snitzer@kernel.org> (raw)
In-Reply-To: <20240619204032.93740-1-snitzer@kernel.org>
Introduce nfsd_serv_get, nfsd_serv_put and nfsd_serv_sync and update
the nfsd code to prevent nfsd_destroy_serv from destroying
nn->nfsd_serv until all nfsd code is done with it (particularly the
localio code that doesn't run in the context of nfsd's svc threads,
nor does it take the nfsd_mutex).
Commit 83d5e5b0af90 ("dm: optimize use SRCU and RCU") provided a
familiar well-worn pattern for how implement.
Suggested-by: NeilBrown <neilb@suse.de>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
fs/nfsd/filecache.c | 13 ++++++++---
fs/nfsd/netns.h | 14 ++++++++++--
fs/nfsd/nfs4state.c | 25 ++++++++++++++-------
fs/nfsd/nfsctl.c | 7 ++++--
fs/nfsd/nfssvc.c | 54 ++++++++++++++++++++++++++++++++++++---------
5 files changed, 88 insertions(+), 25 deletions(-)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 99631fa56662..474b3a3af3fb 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -413,12 +413,15 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
struct nfsd_file *nf = list_first_entry(dispose,
struct nfsd_file, nf_lru);
struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
spin_lock(&l->lock);
list_move_tail(&nf->nf_lru, &l->freeme);
spin_unlock(&l->lock);
- svc_wake_up(nn->nfsd_serv);
+ svc_wake_up(serv);
+ nfsd_serv_put(nn, srcu_idx);
}
}
@@ -443,11 +446,15 @@ void nfsd_file_net_dispose(struct nfsd_net *nn)
for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
list_move(l->freeme.next, &dispose);
spin_unlock(&l->lock);
- if (!list_empty(&l->freeme))
+ if (!list_empty(&l->freeme)) {
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
/* Wake up another thread to share the work
* *before* doing any actual disposing.
*/
- svc_wake_up(nn->nfsd_serv);
+ svc_wake_up(serv);
+ nfsd_serv_put(nn, srcu_idx);
+ }
nfsd_file_dispose_list(&dispose);
}
}
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 0c5a1d97e4ac..92d0d0883f17 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -139,8 +139,14 @@ struct nfsd_net {
u32 clverifier_counter;
struct svc_info nfsd_info;
-#define nfsd_serv nfsd_info.serv
-
+ /*
+ * The current 'nfsd_serv' at nfsd_info.serv. Using 'void' rather than
+ * 'struct svc_serv' to guard against new code dereferencing nfsd_serv
+ * without using proper synchronization.
+ * Use nfsd_serv_get() or take nfsd_mutex to dereference.
+ */
+ void __rcu *nfsd_serv;
+ struct srcu_struct nfsd_serv_srcu;
/*
* clientid and stateid data for construction of net unique COPY
@@ -225,6 +231,10 @@ struct nfsd_net {
extern bool nfsd_support_version(int vers);
extern void nfsd_netns_free_versions(struct nfsd_net *nn);
+extern struct svc_serv *nfsd_serv_get(struct nfsd_net *nn, int *srcu_idx);
+extern void nfsd_serv_put(struct nfsd_net *nn, int srcu_idx);
+extern void nfsd_serv_sync(struct nfsd_net *nn);
+
extern unsigned int nfsd_net_id;
void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a20c2c9d7d45..8876810e569d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1919,6 +1919,8 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn
u32 num = ca->maxreqs;
unsigned long avail, total_avail;
unsigned int scale_factor;
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
spin_lock(&nfsd_drc_lock);
if (nfsd_drc_max_mem > nfsd_drc_mem_used)
@@ -1940,7 +1942,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn
* Give the client one slot even if that would require
* over-allocation--it is better than failure.
*/
- scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+ scale_factor = max_t(unsigned int, 8, serv->sv_nrthreads);
avail = clamp_t(unsigned long, avail, slotsize,
total_avail/scale_factor);
@@ -1949,6 +1951,8 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
+ nfsd_serv_put(nn, srcu_idx);
+
return num;
}
@@ -3702,12 +3706,16 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
{
- u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
+ u32 maxrpc = serv->sv_max_mesg;
+ __be32 status = nfs_ok;
- if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ)
- return nfserr_toosmall;
- if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ)
- return nfserr_toosmall;
+ if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ ||
+ ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) {
+ status = nfserr_toosmall;
+ goto out;
+ }
ca->headerpadsz = 0;
ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
@@ -3726,8 +3734,9 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
* accounting is soft and provides no guarantees either way.
*/
ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
-
- return nfs_ok;
+out:
+ nfsd_serv_put(nn, srcu_idx);
+ return status;
}
/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1bddbbf7418e..2d4c29c25c6a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1569,10 +1569,12 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
{
struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id);
int i, ret, rqstp_index = 0;
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
rcu_read_lock();
- for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
+ for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_rqst *rqstp;
if (i < cb->args[0]) /* already consumed */
@@ -1580,7 +1582,7 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
rqstp_index = 0;
list_for_each_entry_rcu(rqstp,
- &nn->nfsd_serv->sv_pools[i].sp_all_threads,
+ &serv->sv_pools[i].sp_all_threads,
rq_all) {
struct nfsd_genl_rqstp genl_rqstp;
unsigned int status_counter;
@@ -1645,6 +1647,7 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
ret = skb->len;
out:
rcu_read_unlock();
+ nfsd_serv_put(nn, srcu_idx);
return ret;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index bfc58001dd9a..42db1e6dcff7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -300,6 +300,26 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
return 0;
}
+struct svc_serv *nfsd_serv_get(struct nfsd_net *nn, int *srcu_idx)
+ __acquires(nn->nfsd_serv_srcu)
+{
+ *srcu_idx = srcu_read_lock(&nn->nfsd_serv_srcu);
+
+ return srcu_dereference(nn->nfsd_serv, &nn->nfsd_serv_srcu);
+}
+
+void nfsd_serv_put(struct nfsd_net *nn, int srcu_idx)
+ __releases(nn->nfsd_serv_srcu)
+{
+ srcu_read_unlock(&nn->nfsd_serv_srcu, srcu_idx);
+}
+
+void nfsd_serv_sync(struct nfsd_net *nn)
+{
+ synchronize_srcu(&nn->nfsd_serv_srcu);
+ synchronize_rcu_expedited();
+}
+
/*
* Maximum number of nfsd processes
*/
@@ -507,6 +527,7 @@ static void nfsd_shutdown_net(struct net *net)
lockd_down(net);
nn->lockd_up = false;
}
+ cleanup_srcu_struct(&nn->nfsd_serv_srcu);
#if IS_ENABLED(CONFIG_NFSD_LOCALIO)
list_del_rcu(&nn->nfsd_uuid.list);
#endif
@@ -514,6 +535,7 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+// FIXME: nfsd_serv_{get,put} should make it safe to eliminate nfsd_notifier_lock
static DEFINE_SPINLOCK(nfsd_notifier_lock);
static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
@@ -523,20 +545,22 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct net *net = dev_net(dev);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
- if (event != NETDEV_DOWN || !nn->nfsd_serv)
+ if (event != NETDEV_DOWN || !serv)
goto out;
spin_lock(&nfsd_notifier_lock);
- if (nn->nfsd_serv) {
+ if (serv) {
dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
- svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
+ svc_age_temp_xprts_now(serv, (struct sockaddr *)&sin);
}
spin_unlock(&nfsd_notifier_lock);
-
out:
+ nfsd_serv_put(nn, srcu_idx);
return NOTIFY_DONE;
}
@@ -553,22 +577,24 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct net *net = dev_net(dev);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
+ int srcu_idx;
+ struct svc_serv *serv = nfsd_serv_get(nn, &srcu_idx);
- if (event != NETDEV_DOWN || !nn->nfsd_serv)
+ if (event != NETDEV_DOWN || !serv)
goto out;
spin_lock(&nfsd_notifier_lock);
- if (nn->nfsd_serv) {
+ if (serv) {
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
- svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
+ svc_age_temp_xprts_now(serv, (struct sockaddr *)&sin6);
}
spin_unlock(&nfsd_notifier_lock);
-
out:
+ nfsd_serv_put(nn, srcu_idx);
return NOTIFY_DONE;
}
@@ -589,9 +615,12 @@ void nfsd_destroy_serv(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv = nn->nfsd_serv;
+ lockdep_assert_held(&nfsd_mutex);
+
spin_lock(&nfsd_notifier_lock);
- nn->nfsd_serv = NULL;
+ rcu_assign_pointer(nn->nfsd_serv, NULL);
spin_unlock(&nfsd_notifier_lock);
+ nfsd_serv_sync(nn);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
@@ -711,6 +740,10 @@ int nfsd_create_serv(struct net *net)
if (nn->nfsd_serv)
return 0;
+ error = init_srcu_struct(&nn->nfsd_serv_srcu);
+ if (error)
+ return error;
+
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
@@ -727,7 +760,8 @@ int nfsd_create_serv(struct net *net)
}
spin_lock(&nfsd_notifier_lock);
nn->nfsd_info.mutex = &nfsd_mutex;
- nn->nfsd_serv = serv;
+ nn->nfsd_info.serv = serv;
+ rcu_assign_pointer(nn->nfsd_serv, nn->nfsd_info.serv);
spin_unlock(&nfsd_notifier_lock);
set_max_drc();
--
2.44.0
next prev parent reply other threads:[~2024-06-19 20:40 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-19 20:40 [PATCH v6 00/18] nfs/nfsd: add support for localio Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 01/18] nfs: pass nfs_client to nfs_initiate_pgio Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 02/18] nfs: pass descriptor thru nfs_initiate_pgio path Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 03/18] nfs: pass struct file to nfs_init_pgio and nfs_init_commit Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 04/18] sunrpc: add rpcauth_map_to_svc_cred_local Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 05/18] nfs_common: add NFS LOCALIO auxiliary protocol enablement Mike Snitzer
2024-06-21 4:43 ` Jeff Johnson
2024-06-19 20:40 ` [PATCH v6 06/18] nfs/nfsd: add "localio" support Mike Snitzer
2024-06-21 6:08 ` NeilBrown
2024-06-21 23:28 ` Mike Snitzer
2024-06-23 22:27 ` NeilBrown
2024-06-25 4:59 ` Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 07/18] nfsd/localio: manage netns reference in nfsd_open_local_fh Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 08/18] NFS: Enable localio for non-pNFS I/O Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 09/18] pnfs/flexfiles: Enable localio for flexfiles I/O Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 10/18] nfs/localio: use dedicated workqueues for filesystem read and write Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 11/18] nfs: implement v3 and v4 client support for NFS_LOCALIO_PROGRAM Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 12/18] nfsd: implement v3 and v4 server " Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 13/18] nfs/nfsd: consolidate {encode,decode}_opaque_fixed in nfs_xdr.h Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 14/18] nfsd: prepare to use SRCU to dereference nn->nfsd_serv Mike Snitzer
2024-06-19 20:40 ` Mike Snitzer [this message]
2024-06-21 6:35 ` [PATCH v6 15/18] nfsd: " NeilBrown
2024-06-21 23:58 ` Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 16/18] nfsd/localio: use SRCU to dereference nn->nfsd_serv in nfsd_open_local_fh Mike Snitzer
2024-06-19 20:40 ` [PATCH v6 17/18] nfs: add Documentation/filesystems/nfs/localio.rst Mike Snitzer
2024-06-20 13:52 ` Chuck Lever
2024-06-20 14:33 ` Mike Snitzer
2024-06-20 14:45 ` Chuck Lever
2024-06-20 22:12 ` NeilBrown
2024-06-20 22:35 ` Mike Snitzer
2024-06-20 23:28 ` Chuck Lever
2024-06-20 23:42 ` NeilBrown
2024-06-21 0:30 ` Mike Snitzer
2024-06-21 0:38 ` Mike Snitzer
2024-06-21 0:28 ` Mike Snitzer
2024-06-21 2:18 ` Chuck Lever III
2024-06-19 20:40 ` [PATCH v6 18/18] nfs/nfsd: add Kconfig options to allow localio to be enabled Mike Snitzer
2024-06-20 5:04 ` [PATCH v6 00/18] nfs/nfsd: add support for localio Mike Snitzer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240619204032.93740-16-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=jlayton@kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
--cc=snitzer@hammerspace.com \
--cc=trondmy@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox