* [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace
@ 2026-02-26 19:35 Dai Ngo
2026-02-27 15:56 ` Chuck Lever
0 siblings, 1 reply; 6+ messages in thread
From: Dai Ngo @ 2026-02-26 19:35 UTC (permalink / raw)
To: chuck.lever, jlayton, neil, okorniev, tom, hch; +Cc: linux-nfs
Track accumulated callback operations on a per-network-namespace basis
instead of globally, ensuring proper isolation and behavior when running
nfsd in containers.
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
fs/nfsd/netns.h | 5 +++
fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++--------------------
fs/nfsd/nfsctl.c | 5 +++
fs/nfsd/state.h | 2 ++
4 files changed, 52 insertions(+), 35 deletions(-)
v2:
. free memory allocated for nn->nfsd_cb_version4.counts in
nfsd_net_cb_stats_init() on error in nfsd_net_init().
v3:
. reword commit message.
. fix initialization of nn->nfsd_cb_program.nrvers.
v4:
. fix merge conflict in nfsd_net_exit in nfsd-testing branch.
v5:
. restore commit message to the original in v1
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 6ad3fe5d7e12..c101bf2c24c2 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -228,6 +228,11 @@ struct nfsd_net {
struct list_head local_clients;
#endif
siphash_key_t *fh_key;
+
+ struct rpc_version nfsd_cb_version4;
+ const struct rpc_version *nfsd_cb_versions[2];
+ struct rpc_program nfsd_cb_program;
+ struct rpc_stat nfsd_cb_stat;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index aea8bdd2fdc4..759f24657c34 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
.p_decode = nfs4_xdr_dec_##restype, \
.p_arglen = NFS4_enc_##argtype##_sz, \
.p_replen = NFS4_dec_##restype##_sz, \
- .p_statidx = NFSPROC4_CB_##call, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
.p_name = #proc, \
}
@@ -1032,40 +1032,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
-static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
-static const struct rpc_version nfs_cb_version4 = {
-/*
- * Note on the callback rpc program version number: despite language in rfc
- * 5661 section 18.36.3 requiring servers to use 4 in this field, the
- * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
- * in practice that appears to be what implementations use. The section
- * 18.36.3 language is expected to be fixed in an erratum.
- */
- .number = 1,
- .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
- .procs = nfs4_cb_procedures,
- .counts = nfs4_cb_counts,
-};
-
-static const struct rpc_version *nfs_cb_version[2] = {
- [1] = &nfs_cb_version4,
-};
-
-static const struct rpc_program cb_program;
-
-static struct rpc_stat cb_stats = {
- .program = &cb_program
-};
-
#define NFS4_CALLBACK 0x40000000
-static const struct rpc_program cb_program = {
- .name = "nfs4_cb",
- .number = NFS4_CALLBACK,
- .nrvers = ARRAY_SIZE(nfs_cb_version),
- .version = nfs_cb_version,
- .stats = &cb_stats,
- .pipe_dir_name = "nfsd4_cb",
-};
static int max_cb_time(struct net *net)
{
@@ -1152,14 +1119,15 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.addrsize = conn->cb_addrlen,
.saddress = (struct sockaddr *) &conn->cb_saddr,
.timeout = &timeparms,
- .program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
.cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ args.program = &nn->nfsd_cb_program;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
@@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb)
nfsd41_cb_inflight_end(clp);
return queued;
}
+
+void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn)
+{
+ kfree(nn->nfsd_cb_version4.counts);
+}
+
+int nfsd_net_cb_stats_init(struct nfsd_net *nn)
+{
+ nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int,
+ ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL);
+ if (!nn->nfsd_cb_version4.counts)
+ return -ENOMEM;
+ /*
+ * Note on the callback rpc program version number: despite language
+ * in rfc 5661 section 18.36.3 requiring servers to use 4 in this
+ * field, the official xdr descriptions for both 4.0 and 4.1 specify
+ * version 1, and in practice that appears to be what implementations
+ * use. The section 18.36.3 language is expected to be fixed in an
+ * erratum.
+ */
+ nn->nfsd_cb_version4.number = 1;
+
+ nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures);
+ nn->nfsd_cb_version4.procs = nfs4_cb_procedures;
+ nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4;
+
+ memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat));
+ nn->nfsd_cb_program.name = "nfs4_cb";
+ nn->nfsd_cb_program.number = NFS4_CALLBACK;
+ nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions);
+ nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0];
+ nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb";
+ nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat;
+ nn->nfsd_cb_stat.program = &nn->nfsd_cb_program;
+
+ return 0;
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 032ab44feb70..5daa647ef0fa 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net *net)
int retval;
int i;
+ retval = nfsd_net_cb_stats_init(nn);
+ if (retval)
+ return retval;
retval = nfsd_export_init(net);
if (retval)
goto out_export_error;
@@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net *net)
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
+ nfsd_net_cb_stats_shutdown(nn);
return retval;
}
@@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
kfree_sensitive(nn->fh_key);
+ nfsd_net_cb_stats_shutdown(nn);
nfsd_proc_stat_shutdown(net);
percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
nfsd_idmap_shutdown(net);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9b05462da4cc..490193c1877d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation;
struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
struct nfsd4_get_dir_delegation *gdd,
struct nfsd_file *nf);
+int nfsd_net_cb_stats_init(struct nfsd_net *nn);
+void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn);
#endif /* NFSD4_STATE_H */
--
2.47.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace 2026-02-26 19:35 [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace Dai Ngo @ 2026-02-27 15:56 ` Chuck Lever 2026-02-27 18:20 ` Dai Ngo 0 siblings, 1 reply; 6+ messages in thread From: Chuck Lever @ 2026-02-27 15:56 UTC (permalink / raw) To: Dai Ngo, Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Tom Talpey, Christoph Hellwig Cc: linux-nfs On Thu, Feb 26, 2026, at 2:35 PM, Dai Ngo wrote: > Track accumulated callback operations on a per-network-namespace basis > instead of globally, ensuring proper isolation and behavior when running > nfsd in containers. Where are the consumers of this information? "Subsequent patch" is an OK answer, but that should be indicated here in your patch description. > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/netns.h | 5 +++ > fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++-------------------- > fs/nfsd/nfsctl.c | 5 +++ > fs/nfsd/state.h | 2 ++ > 4 files changed, 52 insertions(+), 35 deletions(-) > > v2: > . free memory allocated for nn->nfsd_cb_version4.counts in > nfsd_net_cb_stats_init() on error in nfsd_net_init(). > v3: > . reword commit message. > . fix initialization of nn->nfsd_cb_program.nrvers. > v4: > . fix merge conflict in nfsd_net_exit in nfsd-testing branch. > v5: > . restore commit message to the original in v1 > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > index 6ad3fe5d7e12..c101bf2c24c2 100644 > --- a/fs/nfsd/netns.h > +++ b/fs/nfsd/netns.h > @@ -228,6 +228,11 @@ struct nfsd_net { > struct list_head local_clients; > #endif > siphash_key_t *fh_key; > + > + struct rpc_version nfsd_cb_version4; > + const struct rpc_version *nfsd_cb_versions[2]; I know this is copy-paste of existing code, but can you find a proper symbolic constant to use here instead of "2" ? > + struct rpc_program nfsd_cb_program; > + struct rpc_stat nfsd_cb_stat; > }; > > /* Simple check to find out if a given net was properly initialized */ > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c > index aea8bdd2fdc4..759f24657c34 100644 > --- a/fs/nfsd/nfs4callback.c > +++ b/fs/nfsd/nfs4callback.c > @@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, > .p_decode = nfs4_xdr_dec_##restype, \ > .p_arglen = NFS4_enc_##argtype##_sz, \ > .p_replen = NFS4_dec_##restype##_sz, \ > - .p_statidx = NFSPROC4_CB_##call, \ > + .p_statidx = NFSPROC4_CLNT_##proc, \ > .p_name = #proc, \ > } Previously all compound-based callbacks mapped to statidx 1 (NFSPROC4_CB_COMPOUND); now each operation gets its own counter slot (values 0–7). This changes what stats are reported, IIUC. So bundling it here means a bisect on a stats regression cannot isolate when accounting changed, and reverting either change forces reverting both. IMO this should be a pre-requisite commit with its own rationale. > @@ -1032,40 +1032,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { > PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), > }; > > -static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; > -static const struct rpc_version nfs_cb_version4 = { > -/* > - * Note on the callback rpc program version number: despite language in rfc > - * 5661 section 18.36.3 requiring servers to use 4 in this field, the > - * official xdr descriptions for both 4.0 and 4.1 specify version 1, and > - * in practice that appears to be what implementations use. The section > - * 18.36.3 language is expected to be fixed in an erratum. > - */ > - .number = 1, > - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), > - .procs = nfs4_cb_procedures, > - .counts = nfs4_cb_counts, > -}; > - > -static const struct rpc_version *nfs_cb_version[2] = { > - [1] = &nfs_cb_version4, > -}; > - > -static const struct rpc_program cb_program; > - > -static struct rpc_stat cb_stats = { > - .program = &cb_program > -}; > - > #define NFS4_CALLBACK 0x40000000 > -static const struct rpc_program cb_program = { > - .name = "nfs4_cb", > - .number = NFS4_CALLBACK, > - .nrvers = ARRAY_SIZE(nfs_cb_version), > - .version = nfs_cb_version, > - .stats = &cb_stats, > - .pipe_dir_name = "nfsd4_cb", > -}; > > static int max_cb_time(struct net *net) > { > @@ -1152,14 +1119,15 @@ static int setup_callback_client(struct > nfs4_client *clp, struct nfs4_cb_conn *c > .addrsize = conn->cb_addrlen, > .saddress = (struct sockaddr *) &conn->cb_saddr, > .timeout = &timeparms, > - .program = &cb_program, > .version = 1, > .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), > .cred = current_cred(), > }; > struct rpc_clnt *client; > const struct cred *cred; > + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); Nit: Reverse Christmas tree ordering -- this new declaration belongs close to the top. > + args.program = &nn->nfsd_cb_program; > if (clp->cl_minorversion == 0) { > if (!clp->cl_cred.cr_principal && > (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { > @@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb) > nfsd41_cb_inflight_end(clp); > return queued; > } > + > +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn) > +{ > + kfree(nn->nfsd_cb_version4.counts); > +} > + > +int nfsd_net_cb_stats_init(struct nfsd_net *nn) > +{ > + nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int, > + ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL); > + if (!nn->nfsd_cb_version4.counts) > + return -ENOMEM; > + /* > + * Note on the callback rpc program version number: despite language > + * in rfc 5661 section 18.36.3 requiring servers to use 4 in this > + * field, the official xdr descriptions for both 4.0 and 4.1 specify > + * version 1, and in practice that appears to be what implementations > + * use. The section 18.36.3 language is expected to be fixed in an > + * erratum. > + */ > + nn->nfsd_cb_version4.number = 1; > + > + nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures); > + nn->nfsd_cb_version4.procs = nfs4_cb_procedures; > + nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4; Could you add a comment explaining that slot 0 is intentionally NULL and slot 1 corresponds to the CB protocol version number? The original designated-initializer syntax made this self- evident; the replacement imperative assignment here does not. > + > + memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat)); > + nn->nfsd_cb_program.name = "nfs4_cb"; > + nn->nfsd_cb_program.number = NFS4_CALLBACK; > + nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions); > + nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0]; > + nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb"; > + nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat; > + nn->nfsd_cb_stat.program = &nn->nfsd_cb_program; > + > + return 0; > +} New non-static functions should get kernel-doc comments. > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 032ab44feb70..5daa647ef0fa 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net *net) > int retval; > int i; > > + retval = nfsd_net_cb_stats_init(nn); > + if (retval) > + return retval; Does this build if CONFIG_NFSD_V4 is not enabled? > retval = nfsd_export_init(net); > if (retval) > goto out_export_error; > @@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net *net) > out_idmap_error: > nfsd_export_shutdown(net); > out_export_error: > + nfsd_net_cb_stats_shutdown(nn); > return retval; > } > > @@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net *net) > struct nfsd_net *nn = net_generic(net, nfsd_net_id); > > kfree_sensitive(nn->fh_key); > + nfsd_net_cb_stats_shutdown(nn); > nfsd_proc_stat_shutdown(net); > percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); > nfsd_idmap_shutdown(net); > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 9b05462da4cc..490193c1877d 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation; > struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, > struct nfsd4_get_dir_delegation *gdd, > struct nfsd_file *nf); > +int nfsd_net_cb_stats_init(struct nfsd_net *nn); > +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn); > #endif /* NFSD4_STATE_H */ > -- > 2.47.3 -- Chuck Lever ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace 2026-02-27 15:56 ` Chuck Lever @ 2026-02-27 18:20 ` Dai Ngo 2026-02-27 19:21 ` Chuck Lever 0 siblings, 1 reply; 6+ messages in thread From: Dai Ngo @ 2026-02-27 18:20 UTC (permalink / raw) To: Chuck Lever, Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Tom Talpey, Christoph Hellwig Cc: linux-nfs On 2/27/26 7:56 AM, Chuck Lever wrote: > > On Thu, Feb 26, 2026, at 2:35 PM, Dai Ngo wrote: >> Track accumulated callback operations on a per-network-namespace basis >> instead of globally, ensuring proper isolation and behavior when running >> nfsd in containers. > Where are the consumers of this information? "Subsequent patch" > is an OK answer, but that should be indicated here in your patch > description. Should I first expand the output of /proc/net/rpc/nfsd and then follow up with a netlink-based implementation? Or are we trying to avoid adding anything new under /proc at this point? Also, is there currently any user-space utility that can extract nfsd statistics via the netlink interface? -Dai > > >> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >> --- >> fs/nfsd/netns.h | 5 +++ >> fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++-------------------- >> fs/nfsd/nfsctl.c | 5 +++ >> fs/nfsd/state.h | 2 ++ >> 4 files changed, 52 insertions(+), 35 deletions(-) >> >> v2: >> . free memory allocated for nn->nfsd_cb_version4.counts in >> nfsd_net_cb_stats_init() on error in nfsd_net_init(). >> v3: >> . reword commit message. >> . fix initialization of nn->nfsd_cb_program.nrvers. >> v4: >> . fix merge conflict in nfsd_net_exit in nfsd-testing branch. >> v5: >> . restore commit message to the original in v1 >> >> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >> index 6ad3fe5d7e12..c101bf2c24c2 100644 >> --- a/fs/nfsd/netns.h >> +++ b/fs/nfsd/netns.h >> @@ -228,6 +228,11 @@ struct nfsd_net { >> struct list_head local_clients; >> #endif >> siphash_key_t *fh_key; >> + >> + struct rpc_version nfsd_cb_version4; >> + const struct rpc_version *nfsd_cb_versions[2]; > I know this is copy-paste of existing code, but can you find a > proper symbolic constant to use here instead of "2" ? > > >> + struct rpc_program nfsd_cb_program; >> + struct rpc_stat nfsd_cb_stat; >> }; >> >> /* Simple check to find out if a given net was properly initialized */ >> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >> index aea8bdd2fdc4..759f24657c34 100644 >> --- a/fs/nfsd/nfs4callback.c >> +++ b/fs/nfsd/nfs4callback.c >> @@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, >> .p_decode = nfs4_xdr_dec_##restype, \ >> .p_arglen = NFS4_enc_##argtype##_sz, \ >> .p_replen = NFS4_dec_##restype##_sz, \ >> - .p_statidx = NFSPROC4_CB_##call, \ >> + .p_statidx = NFSPROC4_CLNT_##proc, \ >> .p_name = #proc, \ >> } > Previously all compound-based callbacks mapped to statidx 1 > (NFSPROC4_CB_COMPOUND); now each operation gets its own counter > slot (values 0–7). This changes what stats are reported, IIUC. > So bundling it here means a bisect on a stats regression cannot > isolate when accounting changed, and reverting either change > forces reverting both. > > IMO this should be a pre-requisite commit with its own > rationale. > > >> @@ -1032,40 +1032,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { >> PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), >> }; >> >> -static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; >> -static const struct rpc_version nfs_cb_version4 = { >> -/* >> - * Note on the callback rpc program version number: despite language in rfc >> - * 5661 section 18.36.3 requiring servers to use 4 in this field, the >> - * official xdr descriptions for both 4.0 and 4.1 specify version 1, and >> - * in practice that appears to be what implementations use. The section >> - * 18.36.3 language is expected to be fixed in an erratum. >> - */ >> - .number = 1, >> - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), >> - .procs = nfs4_cb_procedures, >> - .counts = nfs4_cb_counts, >> -}; >> - >> -static const struct rpc_version *nfs_cb_version[2] = { >> - [1] = &nfs_cb_version4, >> -}; >> - >> -static const struct rpc_program cb_program; >> - >> -static struct rpc_stat cb_stats = { >> - .program = &cb_program >> -}; >> - >> #define NFS4_CALLBACK 0x40000000 >> -static const struct rpc_program cb_program = { >> - .name = "nfs4_cb", >> - .number = NFS4_CALLBACK, >> - .nrvers = ARRAY_SIZE(nfs_cb_version), >> - .version = nfs_cb_version, >> - .stats = &cb_stats, >> - .pipe_dir_name = "nfsd4_cb", >> -}; >> >> static int max_cb_time(struct net *net) >> { >> @@ -1152,14 +1119,15 @@ static int setup_callback_client(struct >> nfs4_client *clp, struct nfs4_cb_conn *c >> .addrsize = conn->cb_addrlen, >> .saddress = (struct sockaddr *) &conn->cb_saddr, >> .timeout = &timeparms, >> - .program = &cb_program, >> .version = 1, >> .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), >> .cred = current_cred(), >> }; >> struct rpc_clnt *client; >> const struct cred *cred; >> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > Nit: Reverse Christmas tree ordering -- this new declaration > belongs close to the top. > > >> + args.program = &nn->nfsd_cb_program; >> if (clp->cl_minorversion == 0) { >> if (!clp->cl_cred.cr_principal && >> (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { >> @@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb) >> nfsd41_cb_inflight_end(clp); >> return queued; >> } >> + >> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn) >> +{ >> + kfree(nn->nfsd_cb_version4.counts); >> +} >> + >> +int nfsd_net_cb_stats_init(struct nfsd_net *nn) >> +{ >> + nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int, >> + ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL); >> + if (!nn->nfsd_cb_version4.counts) >> + return -ENOMEM; >> + /* >> + * Note on the callback rpc program version number: despite language >> + * in rfc 5661 section 18.36.3 requiring servers to use 4 in this >> + * field, the official xdr descriptions for both 4.0 and 4.1 specify >> + * version 1, and in practice that appears to be what implementations >> + * use. The section 18.36.3 language is expected to be fixed in an >> + * erratum. >> + */ >> + nn->nfsd_cb_version4.number = 1; >> + >> + nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures); >> + nn->nfsd_cb_version4.procs = nfs4_cb_procedures; >> + nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4; > Could you add a comment explaining that slot 0 is intentionally > NULL and slot 1 corresponds to the CB protocol version number? > The original designated-initializer syntax made this self- > evident; the replacement imperative assignment here does not. > > >> + >> + memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat)); >> + nn->nfsd_cb_program.name = "nfs4_cb"; >> + nn->nfsd_cb_program.number = NFS4_CALLBACK; >> + nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions); >> + nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0]; >> + nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb"; >> + nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat; >> + nn->nfsd_cb_stat.program = &nn->nfsd_cb_program; >> + >> + return 0; >> +} > New non-static functions should get kernel-doc comments. > > >> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c >> index 032ab44feb70..5daa647ef0fa 100644 >> --- a/fs/nfsd/nfsctl.c >> +++ b/fs/nfsd/nfsctl.c >> @@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net *net) >> int retval; >> int i; >> >> + retval = nfsd_net_cb_stats_init(nn); >> + if (retval) >> + return retval; > Does this build if CONFIG_NFSD_V4 is not enabled? > > >> retval = nfsd_export_init(net); >> if (retval) >> goto out_export_error; >> @@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net *net) >> out_idmap_error: >> nfsd_export_shutdown(net); >> out_export_error: >> + nfsd_net_cb_stats_shutdown(nn); >> return retval; >> } >> >> @@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net *net) >> struct nfsd_net *nn = net_generic(net, nfsd_net_id); >> >> kfree_sensitive(nn->fh_key); >> + nfsd_net_cb_stats_shutdown(nn); >> nfsd_proc_stat_shutdown(net); >> percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); >> nfsd_idmap_shutdown(net); >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 9b05462da4cc..490193c1877d 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation; >> struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, >> struct nfsd4_get_dir_delegation *gdd, >> struct nfsd_file *nf); >> +int nfsd_net_cb_stats_init(struct nfsd_net *nn); >> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn); >> #endif /* NFSD4_STATE_H */ >> -- >> 2.47.3 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace 2026-02-27 18:20 ` Dai Ngo @ 2026-02-27 19:21 ` Chuck Lever 2026-02-27 19:45 ` Jeff Layton 0 siblings, 1 reply; 6+ messages in thread From: Chuck Lever @ 2026-02-27 19:21 UTC (permalink / raw) To: Dai Ngo, Chuck Lever, Jeff Layton, NeilBrown, Olga Kornievskaia, Tom Talpey, Christoph Hellwig Cc: linux-nfs On 2/27/26 1:20 PM, Dai Ngo wrote: > > On 2/27/26 7:56 AM, Chuck Lever wrote: >> >> On Thu, Feb 26, 2026, at 2:35 PM, Dai Ngo wrote: >>> Track accumulated callback operations on a per-network-namespace basis >>> instead of globally, ensuring proper isolation and behavior when running >>> nfsd in containers. >> Where are the consumers of this information? "Subsequent patch" >> is an OK answer, but that should be indicated here in your patch >> description. > > Should I first expand the output of /proc/net/rpc/nfsd and then follow > up with a netlink-based implementation? Or are we trying to avoid adding > anything new under /proc at this point? The current kernel-wide policy, as I understand it, is that subsystems are to avoid adding new items under /proc unless absolutely needed. I believe nfsdctl and the NFSD netlink protocol does not yet have an operation to retrieve statistics. Jeff can help you put that together. > Also, is there currently any user-space utility that can extract nfsd > statistics via the netlink interface? > > -Dai > >> >> >>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>> --- >>> fs/nfsd/netns.h | 5 +++ >>> fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++-------------------- >>> fs/nfsd/nfsctl.c | 5 +++ >>> fs/nfsd/state.h | 2 ++ >>> 4 files changed, 52 insertions(+), 35 deletions(-) >>> >>> v2: >>> . free memory allocated for nn->nfsd_cb_version4.counts in >>> nfsd_net_cb_stats_init() on error in nfsd_net_init(). >>> v3: >>> . reword commit message. >>> . fix initialization of nn->nfsd_cb_program.nrvers. >>> v4: >>> . fix merge conflict in nfsd_net_exit in nfsd-testing branch. >>> v5: >>> . restore commit message to the original in v1 >>> >>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >>> index 6ad3fe5d7e12..c101bf2c24c2 100644 >>> --- a/fs/nfsd/netns.h >>> +++ b/fs/nfsd/netns.h >>> @@ -228,6 +228,11 @@ struct nfsd_net { >>> struct list_head local_clients; >>> #endif >>> siphash_key_t *fh_key; >>> + >>> + struct rpc_version nfsd_cb_version4; >>> + const struct rpc_version *nfsd_cb_versions[2]; >> I know this is copy-paste of existing code, but can you find a >> proper symbolic constant to use here instead of "2" ? >> >> >>> + struct rpc_program nfsd_cb_program; >>> + struct rpc_stat nfsd_cb_stat; >>> }; >>> >>> /* Simple check to find out if a given net was properly initialized */ >>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >>> index aea8bdd2fdc4..759f24657c34 100644 >>> --- a/fs/nfsd/nfs4callback.c >>> +++ b/fs/nfsd/nfs4callback.c >>> @@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct >>> rpc_rqst *rqstp, >>> .p_decode = nfs4_xdr_dec_##restype, \ >>> .p_arglen = NFS4_enc_##argtype##_sz, \ >>> .p_replen = NFS4_dec_##restype##_sz, \ >>> - .p_statidx = NFSPROC4_CB_##call, \ >>> + .p_statidx = NFSPROC4_CLNT_##proc, \ >>> .p_name = #proc, \ >>> } >> Previously all compound-based callbacks mapped to statidx 1 >> (NFSPROC4_CB_COMPOUND); now each operation gets its own counter >> slot (values 0–7). This changes what stats are reported, IIUC. >> So bundling it here means a bisect on a stats regression cannot >> isolate when accounting changed, and reverting either change >> forces reverting both. >> >> IMO this should be a pre-requisite commit with its own >> rationale. >> >> >>> @@ -1032,40 +1032,7 @@ static const struct rpc_procinfo >>> nfs4_cb_procedures[] = { >>> PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), >>> }; >>> >>> -static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; >>> -static const struct rpc_version nfs_cb_version4 = { >>> -/* >>> - * Note on the callback rpc program version number: despite language >>> in rfc >>> - * 5661 section 18.36.3 requiring servers to use 4 in this field, the >>> - * official xdr descriptions for both 4.0 and 4.1 specify version 1, >>> and >>> - * in practice that appears to be what implementations use. The >>> section >>> - * 18.36.3 language is expected to be fixed in an erratum. >>> - */ >>> - .number = 1, >>> - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), >>> - .procs = nfs4_cb_procedures, >>> - .counts = nfs4_cb_counts, >>> -}; >>> - >>> -static const struct rpc_version *nfs_cb_version[2] = { >>> - [1] = &nfs_cb_version4, >>> -}; >>> - >>> -static const struct rpc_program cb_program; >>> - >>> -static struct rpc_stat cb_stats = { >>> - .program = &cb_program >>> -}; >>> - >>> #define NFS4_CALLBACK 0x40000000 >>> -static const struct rpc_program cb_program = { >>> - .name = "nfs4_cb", >>> - .number = NFS4_CALLBACK, >>> - .nrvers = ARRAY_SIZE(nfs_cb_version), >>> - .version = nfs_cb_version, >>> - .stats = &cb_stats, >>> - .pipe_dir_name = "nfsd4_cb", >>> -}; >>> >>> static int max_cb_time(struct net *net) >>> { >>> @@ -1152,14 +1119,15 @@ static int setup_callback_client(struct >>> nfs4_client *clp, struct nfs4_cb_conn *c >>> .addrsize = conn->cb_addrlen, >>> .saddress = (struct sockaddr *) &conn->cb_saddr, >>> .timeout = &timeparms, >>> - .program = &cb_program, >>> .version = 1, >>> .flags = (RPC_CLNT_CREATE_NOPING | >>> RPC_CLNT_CREATE_QUIET), >>> .cred = current_cred(), >>> }; >>> struct rpc_clnt *client; >>> const struct cred *cred; >>> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); >> Nit: Reverse Christmas tree ordering -- this new declaration >> belongs close to the top. >> >> >>> + args.program = &nn->nfsd_cb_program; >>> if (clp->cl_minorversion == 0) { >>> if (!clp->cl_cred.cr_principal && >>> (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { >>> @@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb) >>> nfsd41_cb_inflight_end(clp); >>> return queued; >>> } >>> + >>> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn) >>> +{ >>> + kfree(nn->nfsd_cb_version4.counts); >>> +} >>> + >>> +int nfsd_net_cb_stats_init(struct nfsd_net *nn) >>> +{ >>> + nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int, >>> + ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL); >>> + if (!nn->nfsd_cb_version4.counts) >>> + return -ENOMEM; >>> + /* >>> + * Note on the callback rpc program version number: despite >>> language >>> + * in rfc 5661 section 18.36.3 requiring servers to use 4 in this >>> + * field, the official xdr descriptions for both 4.0 and 4.1 >>> specify >>> + * version 1, and in practice that appears to be what >>> implementations >>> + * use. The section 18.36.3 language is expected to be fixed in an >>> + * erratum. >>> + */ >>> + nn->nfsd_cb_version4.number = 1; >>> + >>> + nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures); >>> + nn->nfsd_cb_version4.procs = nfs4_cb_procedures; >>> + nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4; >> Could you add a comment explaining that slot 0 is intentionally >> NULL and slot 1 corresponds to the CB protocol version number? >> The original designated-initializer syntax made this self- >> evident; the replacement imperative assignment here does not. >> >> >>> + >>> + memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat)); >>> + nn->nfsd_cb_program.name = "nfs4_cb"; >>> + nn->nfsd_cb_program.number = NFS4_CALLBACK; >>> + nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions); >>> + nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0]; >>> + nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb"; >>> + nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat; >>> + nn->nfsd_cb_stat.program = &nn->nfsd_cb_program; >>> + >>> + return 0; >>> +} >> New non-static functions should get kernel-doc comments. >> >> >>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c >>> index 032ab44feb70..5daa647ef0fa 100644 >>> --- a/fs/nfsd/nfsctl.c >>> +++ b/fs/nfsd/nfsctl.c >>> @@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net >>> *net) >>> int retval; >>> int i; >>> >>> + retval = nfsd_net_cb_stats_init(nn); >>> + if (retval) >>> + return retval; >> Does this build if CONFIG_NFSD_V4 is not enabled? >> >> >>> retval = nfsd_export_init(net); >>> if (retval) >>> goto out_export_error; >>> @@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net >>> *net) >>> out_idmap_error: >>> nfsd_export_shutdown(net); >>> out_export_error: >>> + nfsd_net_cb_stats_shutdown(nn); >>> return retval; >>> } >>> >>> @@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net >>> *net) >>> struct nfsd_net *nn = net_generic(net, nfsd_net_id); >>> >>> kfree_sensitive(nn->fh_key); >>> + nfsd_net_cb_stats_shutdown(nn); >>> nfsd_proc_stat_shutdown(net); >>> percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); >>> nfsd_idmap_shutdown(net); >>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >>> index 9b05462da4cc..490193c1877d 100644 >>> --- a/fs/nfsd/state.h >>> +++ b/fs/nfsd/state.h >>> @@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation; >>> struct nfs4_delegation *nfsd_get_dir_deleg(struct >>> nfsd4_compound_state *cstate, >>> struct nfsd4_get_dir_delegation *gdd, >>> struct nfsd_file *nf); >>> +int nfsd_net_cb_stats_init(struct nfsd_net *nn); >>> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn); >>> #endif /* NFSD4_STATE_H */ >>> -- >>> 2.47.3 -- Chuck Lever ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace 2026-02-27 19:21 ` Chuck Lever @ 2026-02-27 19:45 ` Jeff Layton 2026-02-28 0:44 ` Dai Ngo 0 siblings, 1 reply; 6+ messages in thread From: Jeff Layton @ 2026-02-27 19:45 UTC (permalink / raw) To: Chuck Lever, Dai Ngo, Chuck Lever, NeilBrown, Olga Kornievskaia, Tom Talpey, Christoph Hellwig Cc: linux-nfs On Fri, 2026-02-27 at 14:21 -0500, Chuck Lever wrote: > On 2/27/26 1:20 PM, Dai Ngo wrote: > > > > On 2/27/26 7:56 AM, Chuck Lever wrote: > > > > > > On Thu, Feb 26, 2026, at 2:35 PM, Dai Ngo wrote: > > > > Track accumulated callback operations on a per-network-namespace basis > > > > instead of globally, ensuring proper isolation and behavior when running > > > > nfsd in containers. > > > Where are the consumers of this information? "Subsequent patch" > > > is an OK answer, but that should be indicated here in your patch > > > description. > > > > Should I first expand the output of /proc/net/rpc/nfsd and then follow > > up with a netlink-based implementation? Or are we trying to avoid adding > > anything new under /proc at this point? > > The current kernel-wide policy, as I understand it, is that subsystems > are to avoid adding new items under /proc unless absolutely needed. > +1 Dealing with file-based interfaces for this sort of thing is a giant PITA for userland. Netlink is a much cleaner interface to deal with. No partial reads of the file, etc... > I believe nfsdctl and the NFSD netlink protocol does not yet have an > operation to retrieve statistics. Jeff can help you put that together. > There is a rpc-status-get command, but that's a bit different from what this is adding. You'll probably want to add a new netlink command to get these stats and a new set of attributes for them. Have a look at Documentation/netlink/specs/nfsd.yaml. You'll want to extend that and regenerate the headers and code, and then implement the new commands. For this, it might be best to first replicate the stats that /proc/net/rpc/nfsd already provides to be accessible via netlink. Then you could add support for the new stats you want to add. Then in userland, you could extend nfsstat to attempt to use netlink first and only fall back to /proc scraping if the command doesn't exist. > > > Also, is there currently any user-space utility that can extract nfsd > > statistics via the netlink interface? > > > > -Dai > > > > > > > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > > --- > > > > fs/nfsd/netns.h | 5 +++ > > > > fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++-------------------- > > > > fs/nfsd/nfsctl.c | 5 +++ > > > > fs/nfsd/state.h | 2 ++ > > > > 4 files changed, 52 insertions(+), 35 deletions(-) > > > > > > > > v2: > > > > . free memory allocated for nn->nfsd_cb_version4.counts in > > > > nfsd_net_cb_stats_init() on error in nfsd_net_init(). > > > > v3: > > > > . reword commit message. > > > > . fix initialization of nn->nfsd_cb_program.nrvers. > > > > v4: > > > > . fix merge conflict in nfsd_net_exit in nfsd-testing branch. > > > > v5: > > > > . restore commit message to the original in v1 > > > > > > > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h > > > > index 6ad3fe5d7e12..c101bf2c24c2 100644 > > > > --- a/fs/nfsd/netns.h > > > > +++ b/fs/nfsd/netns.h > > > > @@ -228,6 +228,11 @@ struct nfsd_net { > > > > struct list_head local_clients; > > > > #endif > > > > siphash_key_t *fh_key; > > > > + > > > > + struct rpc_version nfsd_cb_version4; > > > > + const struct rpc_version *nfsd_cb_versions[2]; > > > I know this is copy-paste of existing code, but can you find a > > > proper symbolic constant to use here instead of "2" ? > > > > > > > > > > + struct rpc_program nfsd_cb_program; > > > > + struct rpc_stat nfsd_cb_stat; > > > > }; > > > > > > > > /* Simple check to find out if a given net was properly initialized */ > > > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c > > > > index aea8bdd2fdc4..759f24657c34 100644 > > > > --- a/fs/nfsd/nfs4callback.c > > > > +++ b/fs/nfsd/nfs4callback.c > > > > @@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct > > > > rpc_rqst *rqstp, > > > > .p_decode = nfs4_xdr_dec_##restype, \ > > > > .p_arglen = NFS4_enc_##argtype##_sz, \ > > > > .p_replen = NFS4_dec_##restype##_sz, \ > > > > - .p_statidx = NFSPROC4_CB_##call, \ > > > > + .p_statidx = NFSPROC4_CLNT_##proc, \ > > > > .p_name = #proc, \ > > > > } > > > Previously all compound-based callbacks mapped to statidx 1 > > > (NFSPROC4_CB_COMPOUND); now each operation gets its own counter > > > slot (values 0–7). This changes what stats are reported, IIUC. > > > So bundling it here means a bisect on a stats regression cannot > > > isolate when accounting changed, and reverting either change > > > forces reverting both. > > > > > > IMO this should be a pre-requisite commit with its own > > > rationale. > > > > > > > > > > @@ -1032,40 +1032,7 @@ static const struct rpc_procinfo > > > > nfs4_cb_procedures[] = { > > > > PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), > > > > }; > > > > > > > > -static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; > > > > -static const struct rpc_version nfs_cb_version4 = { > > > > -/* > > > > - * Note on the callback rpc program version number: despite language > > > > in rfc > > > > - * 5661 section 18.36.3 requiring servers to use 4 in this field, the > > > > - * official xdr descriptions for both 4.0 and 4.1 specify version 1, > > > > and > > > > - * in practice that appears to be what implementations use. The > > > > section > > > > - * 18.36.3 language is expected to be fixed in an erratum. > > > > - */ > > > > - .number = 1, > > > > - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), > > > > - .procs = nfs4_cb_procedures, > > > > - .counts = nfs4_cb_counts, > > > > -}; > > > > - > > > > -static const struct rpc_version *nfs_cb_version[2] = { > > > > - [1] = &nfs_cb_version4, > > > > -}; > > > > - > > > > -static const struct rpc_program cb_program; > > > > - > > > > -static struct rpc_stat cb_stats = { > > > > - .program = &cb_program > > > > -}; > > > > - > > > > #define NFS4_CALLBACK 0x40000000 > > > > -static const struct rpc_program cb_program = { > > > > - .name = "nfs4_cb", > > > > - .number = NFS4_CALLBACK, > > > > - .nrvers = ARRAY_SIZE(nfs_cb_version), > > > > - .version = nfs_cb_version, > > > > - .stats = &cb_stats, > > > > - .pipe_dir_name = "nfsd4_cb", > > > > -}; > > > > > > > > static int max_cb_time(struct net *net) > > > > { > > > > @@ -1152,14 +1119,15 @@ static int setup_callback_client(struct > > > > nfs4_client *clp, struct nfs4_cb_conn *c > > > > .addrsize = conn->cb_addrlen, > > > > .saddress = (struct sockaddr *) &conn->cb_saddr, > > > > .timeout = &timeparms, > > > > - .program = &cb_program, > > > > .version = 1, > > > > .flags = (RPC_CLNT_CREATE_NOPING | > > > > RPC_CLNT_CREATE_QUIET), > > > > .cred = current_cred(), > > > > }; > > > > struct rpc_clnt *client; > > > > const struct cred *cred; > > > > + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > > Nit: Reverse Christmas tree ordering -- this new declaration > > > belongs close to the top. > > > > > > > > > > + args.program = &nn->nfsd_cb_program; > > > > if (clp->cl_minorversion == 0) { > > > > if (!clp->cl_cred.cr_principal && > > > > (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { > > > > @@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb) > > > > nfsd41_cb_inflight_end(clp); > > > > return queued; > > > > } > > > > + > > > > +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn) > > > > +{ > > > > + kfree(nn->nfsd_cb_version4.counts); > > > > +} > > > > + > > > > +int nfsd_net_cb_stats_init(struct nfsd_net *nn) > > > > +{ > > > > + nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int, > > > > + ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL); > > > > + if (!nn->nfsd_cb_version4.counts) > > > > + return -ENOMEM; > > > > + /* > > > > + * Note on the callback rpc program version number: despite > > > > language > > > > + * in rfc 5661 section 18.36.3 requiring servers to use 4 in this > > > > + * field, the official xdr descriptions for both 4.0 and 4.1 > > > > specify > > > > + * version 1, and in practice that appears to be what > > > > implementations > > > > + * use. The section 18.36.3 language is expected to be fixed in an > > > > + * erratum. > > > > + */ > > > > + nn->nfsd_cb_version4.number = 1; > > > > + > > > > + nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures); > > > > + nn->nfsd_cb_version4.procs = nfs4_cb_procedures; > > > > + nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4; > > > Could you add a comment explaining that slot 0 is intentionally > > > NULL and slot 1 corresponds to the CB protocol version number? > > > The original designated-initializer syntax made this self- > > > evident; the replacement imperative assignment here does not. > > > > > > > > > > + > > > > + memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat)); > > > > + nn->nfsd_cb_program.name = "nfs4_cb"; > > > > + nn->nfsd_cb_program.number = NFS4_CALLBACK; > > > > + nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions); > > > > + nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0]; > > > > + nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb"; > > > > + nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat; > > > > + nn->nfsd_cb_stat.program = &nn->nfsd_cb_program; > > > > + > > > > + return 0; > > > > +} > > > New non-static functions should get kernel-doc comments. > > > > > > > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > > > index 032ab44feb70..5daa647ef0fa 100644 > > > > --- a/fs/nfsd/nfsctl.c > > > > +++ b/fs/nfsd/nfsctl.c > > > > @@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net > > > > *net) > > > > int retval; > > > > int i; > > > > > > > > + retval = nfsd_net_cb_stats_init(nn); > > > > + if (retval) > > > > + return retval; > > > Does this build if CONFIG_NFSD_V4 is not enabled? > > > > > > > > > > retval = nfsd_export_init(net); > > > > if (retval) > > > > goto out_export_error; > > > > @@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net > > > > *net) > > > > out_idmap_error: > > > > nfsd_export_shutdown(net); > > > > out_export_error: > > > > + nfsd_net_cb_stats_shutdown(nn); > > > > return retval; > > > > } > > > > > > > > @@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net > > > > *net) > > > > struct nfsd_net *nn = net_generic(net, nfsd_net_id); > > > > > > > > kfree_sensitive(nn->fh_key); > > > > + nfsd_net_cb_stats_shutdown(nn); > > > > nfsd_proc_stat_shutdown(net); > > > > percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); > > > > nfsd_idmap_shutdown(net); > > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > > > index 9b05462da4cc..490193c1877d 100644 > > > > --- a/fs/nfsd/state.h > > > > +++ b/fs/nfsd/state.h > > > > @@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation; > > > > struct nfs4_delegation *nfsd_get_dir_deleg(struct > > > > nfsd4_compound_state *cstate, > > > > struct nfsd4_get_dir_delegation *gdd, > > > > struct nfsd_file *nf); > > > > +int nfsd_net_cb_stats_init(struct nfsd_net *nn); > > > > +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn); > > > > #endif /* NFSD4_STATE_H */ > > > > -- > > > > 2.47.3 > -- Jeff Layton <jlayton@kernel.org> ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace 2026-02-27 19:45 ` Jeff Layton @ 2026-02-28 0:44 ` Dai Ngo 0 siblings, 0 replies; 6+ messages in thread From: Dai Ngo @ 2026-02-28 0:44 UTC (permalink / raw) To: Jeff Layton, Chuck Lever, Chuck Lever, NeilBrown, Olga Kornievskaia, Tom Talpey, Christoph Hellwig Cc: linux-nfs On 2/27/26 11:45 AM, Jeff Layton wrote: > On Fri, 2026-02-27 at 14:21 -0500, Chuck Lever wrote: >> On 2/27/26 1:20 PM, Dai Ngo wrote: >>> On 2/27/26 7:56 AM, Chuck Lever wrote: >>>> On Thu, Feb 26, 2026, at 2:35 PM, Dai Ngo wrote: >>>>> Track accumulated callback operations on a per-network-namespace basis >>>>> instead of globally, ensuring proper isolation and behavior when running >>>>> nfsd in containers. >>>> Where are the consumers of this information? "Subsequent patch" >>>> is an OK answer, but that should be indicated here in your patch >>>> description. >>> Should I first expand the output of /proc/net/rpc/nfsd and then follow >>> up with a netlink-based implementation? Or are we trying to avoid adding >>> anything new under /proc at this point? >> The current kernel-wide policy, as I understand it, is that subsystems >> are to avoid adding new items under /proc unless absolutely needed. >> > +1 This patch does not add any new object under /proc. It moves existing statistic from global to per-net-namespace. > > Dealing with file-based interfaces for this sort of thing is a giant > PITA for userland. Netlink is a much cleaner interface to deal with. No > partial reads of the file, etc... Yes, netlink is a much cleaner interface but it requires userland utility written to retrieve and display the data in a user-friendly format for end users or administrators. I think the output of at /proc/net/rpc/nfsd' is still useful for developers who want a quick look of what's going in the back channel. > >> I believe nfsdctl and the NFSD netlink protocol does not yet have an >> operation to retrieve statistics. Jeff can help you put that together. >> > There is a rpc-status-get command, Is this in nfsutil package? > but that's a bit different from what > this is adding. You'll probably want to add a new netlink command to > get these stats and a new set of attributes for them. > > Have a look at Documentation/netlink/specs/nfsd.yaml. You'll want to > extend that and regenerate the headers and code, and then implement the > new commands. > > For this, it might be best to first replicate the stats that > /proc/net/rpc/nfsd already provides to be accessible via netlink. Then > you could add support for the new stats you want to add. Then in > userland, you could extend nfsstat to attempt to use netlink first and > only fall back to /proc scraping if the command doesn't exist. I will look into this. Thanks, -Dai > >>> Also, is there currently any user-space utility that can extract nfsd >>> statistics via the netlink interface? >>> >>> -Dai >>> >>>> >>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>>>> --- >>>>> fs/nfsd/netns.h | 5 +++ >>>>> fs/nfsd/nfs4callback.c | 75 ++++++++++++++++++++++-------------------- >>>>> fs/nfsd/nfsctl.c | 5 +++ >>>>> fs/nfsd/state.h | 2 ++ >>>>> 4 files changed, 52 insertions(+), 35 deletions(-) >>>>> >>>>> v2: >>>>> . free memory allocated for nn->nfsd_cb_version4.counts in >>>>> nfsd_net_cb_stats_init() on error in nfsd_net_init(). >>>>> v3: >>>>> . reword commit message. >>>>> . fix initialization of nn->nfsd_cb_program.nrvers. >>>>> v4: >>>>> . fix merge conflict in nfsd_net_exit in nfsd-testing branch. >>>>> v5: >>>>> . restore commit message to the original in v1 >>>>> >>>>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h >>>>> index 6ad3fe5d7e12..c101bf2c24c2 100644 >>>>> --- a/fs/nfsd/netns.h >>>>> +++ b/fs/nfsd/netns.h >>>>> @@ -228,6 +228,11 @@ struct nfsd_net { >>>>> struct list_head local_clients; >>>>> #endif >>>>> siphash_key_t *fh_key; >>>>> + >>>>> + struct rpc_version nfsd_cb_version4; >>>>> + const struct rpc_version *nfsd_cb_versions[2]; >>>> I know this is copy-paste of existing code, but can you find a >>>> proper symbolic constant to use here instead of "2" ? >>>> >>>> >>>>> + struct rpc_program nfsd_cb_program; >>>>> + struct rpc_stat nfsd_cb_stat; >>>>> }; >>>>> >>>>> /* Simple check to find out if a given net was properly initialized */ >>>>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >>>>> index aea8bdd2fdc4..759f24657c34 100644 >>>>> --- a/fs/nfsd/nfs4callback.c >>>>> +++ b/fs/nfsd/nfs4callback.c >>>>> @@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct >>>>> rpc_rqst *rqstp, >>>>> .p_decode = nfs4_xdr_dec_##restype, \ >>>>> .p_arglen = NFS4_enc_##argtype##_sz, \ >>>>> .p_replen = NFS4_dec_##restype##_sz, \ >>>>> - .p_statidx = NFSPROC4_CB_##call, \ >>>>> + .p_statidx = NFSPROC4_CLNT_##proc, \ >>>>> .p_name = #proc, \ >>>>> } >>>> Previously all compound-based callbacks mapped to statidx 1 >>>> (NFSPROC4_CB_COMPOUND); now each operation gets its own counter >>>> slot (values 0–7). This changes what stats are reported, IIUC. >>>> So bundling it here means a bisect on a stats regression cannot >>>> isolate when accounting changed, and reverting either change >>>> forces reverting both. >>>> >>>> IMO this should be a pre-requisite commit with its own >>>> rationale. >>>> >>>> >>>>> @@ -1032,40 +1032,7 @@ static const struct rpc_procinfo >>>>> nfs4_cb_procedures[] = { >>>>> PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), >>>>> }; >>>>> >>>>> -static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; >>>>> -static const struct rpc_version nfs_cb_version4 = { >>>>> -/* >>>>> - * Note on the callback rpc program version number: despite language >>>>> in rfc >>>>> - * 5661 section 18.36.3 requiring servers to use 4 in this field, the >>>>> - * official xdr descriptions for both 4.0 and 4.1 specify version 1, >>>>> and >>>>> - * in practice that appears to be what implementations use. The >>>>> section >>>>> - * 18.36.3 language is expected to be fixed in an erratum. >>>>> - */ >>>>> - .number = 1, >>>>> - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), >>>>> - .procs = nfs4_cb_procedures, >>>>> - .counts = nfs4_cb_counts, >>>>> -}; >>>>> - >>>>> -static const struct rpc_version *nfs_cb_version[2] = { >>>>> - [1] = &nfs_cb_version4, >>>>> -}; >>>>> - >>>>> -static const struct rpc_program cb_program; >>>>> - >>>>> -static struct rpc_stat cb_stats = { >>>>> - .program = &cb_program >>>>> -}; >>>>> - >>>>> #define NFS4_CALLBACK 0x40000000 >>>>> -static const struct rpc_program cb_program = { >>>>> - .name = "nfs4_cb", >>>>> - .number = NFS4_CALLBACK, >>>>> - .nrvers = ARRAY_SIZE(nfs_cb_version), >>>>> - .version = nfs_cb_version, >>>>> - .stats = &cb_stats, >>>>> - .pipe_dir_name = "nfsd4_cb", >>>>> -}; >>>>> >>>>> static int max_cb_time(struct net *net) >>>>> { >>>>> @@ -1152,14 +1119,15 @@ static int setup_callback_client(struct >>>>> nfs4_client *clp, struct nfs4_cb_conn *c >>>>> .addrsize = conn->cb_addrlen, >>>>> .saddress = (struct sockaddr *) &conn->cb_saddr, >>>>> .timeout = &timeparms, >>>>> - .program = &cb_program, >>>>> .version = 1, >>>>> .flags = (RPC_CLNT_CREATE_NOPING | >>>>> RPC_CLNT_CREATE_QUIET), >>>>> .cred = current_cred(), >>>>> }; >>>>> struct rpc_clnt *client; >>>>> const struct cred *cred; >>>>> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); >>>> Nit: Reverse Christmas tree ordering -- this new declaration >>>> belongs close to the top. >>>> >>>> >>>>> + args.program = &nn->nfsd_cb_program; >>>>> if (clp->cl_minorversion == 0) { >>>>> if (!clp->cl_cred.cr_principal && >>>>> (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { >>>>> @@ -1786,3 +1754,40 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb) >>>>> nfsd41_cb_inflight_end(clp); >>>>> return queued; >>>>> } >>>>> + >>>>> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn) >>>>> +{ >>>>> + kfree(nn->nfsd_cb_version4.counts); >>>>> +} >>>>> + >>>>> +int nfsd_net_cb_stats_init(struct nfsd_net *nn) >>>>> +{ >>>>> + nn->nfsd_cb_version4.counts = kzalloc_objs(unsigned int, >>>>> + ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL); >>>>> + if (!nn->nfsd_cb_version4.counts) >>>>> + return -ENOMEM; >>>>> + /* >>>>> + * Note on the callback rpc program version number: despite >>>>> language >>>>> + * in rfc 5661 section 18.36.3 requiring servers to use 4 in this >>>>> + * field, the official xdr descriptions for both 4.0 and 4.1 >>>>> specify >>>>> + * version 1, and in practice that appears to be what >>>>> implementations >>>>> + * use. The section 18.36.3 language is expected to be fixed in an >>>>> + * erratum. >>>>> + */ >>>>> + nn->nfsd_cb_version4.number = 1; >>>>> + >>>>> + nn->nfsd_cb_version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures); >>>>> + nn->nfsd_cb_version4.procs = nfs4_cb_procedures; >>>>> + nn->nfsd_cb_versions[1] = &nn->nfsd_cb_version4; >>>> Could you add a comment explaining that slot 0 is intentionally >>>> NULL and slot 1 corresponds to the CB protocol version number? >>>> The original designated-initializer syntax made this self- >>>> evident; the replacement imperative assignment here does not. >>>> >>>> >>>>> + >>>>> + memset(&nn->nfsd_cb_stat, 0, sizeof(nn->nfsd_cb_stat)); >>>>> + nn->nfsd_cb_program.name = "nfs4_cb"; >>>>> + nn->nfsd_cb_program.number = NFS4_CALLBACK; >>>>> + nn->nfsd_cb_program.nrvers = ARRAY_SIZE(nn->nfsd_cb_versions); >>>>> + nn->nfsd_cb_program.version = &nn->nfsd_cb_versions[0]; >>>>> + nn->nfsd_cb_program.pipe_dir_name = "nfsd4_cb"; >>>>> + nn->nfsd_cb_program.stats = &nn->nfsd_cb_stat; >>>>> + nn->nfsd_cb_stat.program = &nn->nfsd_cb_program; >>>>> + >>>>> + return 0; >>>>> +} >>>> New non-static functions should get kernel-doc comments. >>>> >>>> >>>>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c >>>>> index 032ab44feb70..5daa647ef0fa 100644 >>>>> --- a/fs/nfsd/nfsctl.c >>>>> +++ b/fs/nfsd/nfsctl.c >>>>> @@ -2216,6 +2216,9 @@ static __net_init int nfsd_net_init(struct net >>>>> *net) >>>>> int retval; >>>>> int i; >>>>> >>>>> + retval = nfsd_net_cb_stats_init(nn); >>>>> + if (retval) >>>>> + return retval; >>>> Does this build if CONFIG_NFSD_V4 is not enabled? >>>> >>>> >>>>> retval = nfsd_export_init(net); >>>>> if (retval) >>>>> goto out_export_error; >>>>> @@ -2256,6 +2259,7 @@ static __net_init int nfsd_net_init(struct net >>>>> *net) >>>>> out_idmap_error: >>>>> nfsd_export_shutdown(net); >>>>> out_export_error: >>>>> + nfsd_net_cb_stats_shutdown(nn); >>>>> return retval; >>>>> } >>>>> >>>>> @@ -2286,6 +2290,7 @@ static __net_exit void nfsd_net_exit(struct net >>>>> *net) >>>>> struct nfsd_net *nn = net_generic(net, nfsd_net_id); >>>>> >>>>> kfree_sensitive(nn->fh_key); >>>>> + nfsd_net_cb_stats_shutdown(nn); >>>>> nfsd_proc_stat_shutdown(net); >>>>> percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); >>>>> nfsd_idmap_shutdown(net); >>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >>>>> index 9b05462da4cc..490193c1877d 100644 >>>>> --- a/fs/nfsd/state.h >>>>> +++ b/fs/nfsd/state.h >>>>> @@ -895,4 +895,6 @@ struct nfsd4_get_dir_delegation; >>>>> struct nfs4_delegation *nfsd_get_dir_deleg(struct >>>>> nfsd4_compound_state *cstate, >>>>> struct nfsd4_get_dir_delegation *gdd, >>>>> struct nfsd_file *nf); >>>>> +int nfsd_net_cb_stats_init(struct nfsd_net *nn); >>>>> +void nfsd_net_cb_stats_shutdown(struct nfsd_net *nn); >>>>> #endif /* NFSD4_STATE_H */ >>>>> -- >>>>> 2.47.3 ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2026-02-28 0:45 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-02-26 19:35 [PATCH v5 1/1] NFSD: move accumulated callback ops to per-net namespace Dai Ngo 2026-02-27 15:56 ` Chuck Lever 2026-02-27 18:20 ` Dai Ngo 2026-02-27 19:21 ` Chuck Lever 2026-02-27 19:45 ` Jeff Layton 2026-02-28 0:44 ` Dai Ngo
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox