* [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete
@ 2025-06-12 3:55 Li Lingfeng
2025-06-17 20:49 ` Jeff Layton
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Li Lingfeng @ 2025-06-12 3:55 UTC (permalink / raw)
To: chuck.lever, jlayton, neilb, okorniev, Dai.Ngo, tom, linux-nfs,
linux-kernel
Cc: yukuai1, houtao1, yi.zhang, yangerkun, lilingfeng, lilingfeng3
Checking whether tracking callbacks can be called based on whether
nn->client_tracking_ops is NULL may lead to callbacks being invoked
before tracking initialization completes, causing resource access
violations (UAF, NULL pointer dereference). Examples:
1) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
nfs4_cld_state_init
nn->reclaim_str_hashtbl = kmalloc_array
... // error path, goto err
nfs4_cld_state_shutdown
kfree(nn->reclaim_str_hashtbl)
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
nfs4_release_reclaim
nn->reclaim_str_hashtbl[i]
// UAF
// clear nn->client_tracking_ops
2) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
alloc_cld_upcall
cn = nn->cld_net
spin_lock // cn->cn_lock
// NULL deref
// error path, skip init pipe
__nfsd4_init_cld_pipe
cn = kzalloc
nn->cld_net = cn
// clear nn->client_tracking_ops
After nfsd mounts, users can trigger grace_done callbacks via
/proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
in error paths, this causes access violations.
Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
---
Changes in v2:
Use nfsd_mutex instead of adding a new flag to prevent concurrency.
fs/nfsd/nfs4recover.c | 8 ++++++++
fs/nfsd/nfs4state.c | 4 ++++
fs/nfsd/nfsctl.c | 2 ++
3 files changed, 14 insertions(+)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 82785db730d9..8ac089f8134c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
if (error == -ENOENT) {
printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
"Reboot recovery will not function correctly!\n");
+ mutex_lock(&nfsd_mutex);
nfsd4_client_tracking_exit(clp->net);
+ mutex_unlock(&nfsd_mutex);
}
}
@@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
nn->client_tracking_ops->create(clp);
+ mutex_unlock(&nfsd_mutex);
}
void
@@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
nn->client_tracking_ops->remove(clp);
+ mutex_unlock(&nfsd_mutex);
}
int
@@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
return nn->client_tracking_ops->check(clp);
+ mutex_unlock(&nfsd_mutex);
return -EOPNOTSUPP;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5694987f86f..2794fdc8b678 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
nn->reclaim_str_hashtbl_size) {
printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
clp->net->ns.inum);
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_unlock(&nfsd_mutex);
}
}
@@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
lt.new_timeo = 0;
goto out;
}
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_unlock(&nfsd_mutex);
spin_lock(&nn->s2s_cp_lock);
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3f3e9f6c4250..649850b4bb60 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
if (!nn->nfsd_serv)
return -EBUSY;
trace_nfsd_end_grace(netns(file));
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_lock(&nfsd_mutex);
break;
default:
return -EINVAL;
--
2.46.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete
2025-06-12 3:55 [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete Li Lingfeng
@ 2025-06-17 20:49 ` Jeff Layton
2025-06-18 12:35 ` Jeff Layton
2025-06-18 20:45 ` NeilBrown
2 siblings, 0 replies; 5+ messages in thread
From: Jeff Layton @ 2025-06-17 20:49 UTC (permalink / raw)
To: Li Lingfeng, chuck.lever, neilb, okorniev, Dai.Ngo, tom,
linux-nfs, linux-kernel
Cc: yukuai1, houtao1, yi.zhang, yangerkun, lilingfeng
On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote:
> Checking whether tracking callbacks can be called based on whether
> nn->client_tracking_ops is NULL may lead to callbacks being invoked
> before tracking initialization completes, causing resource access
> violations (UAF, NULL pointer dereference). Examples:
>
> 1) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> nfs4_cld_state_init
> nn->reclaim_str_hashtbl = kmalloc_array
> ... // error path, goto err
> nfs4_cld_state_shutdown
> kfree(nn->reclaim_str_hashtbl)
> write_v4_end_grace
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> nfs4_release_reclaim
> nn->reclaim_str_hashtbl[i]
> // UAF
> // clear nn->client_tracking_ops
>
> 2) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> write_v4_end_grace
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> alloc_cld_upcall
> cn = nn->cld_net
> spin_lock // cn->cn_lock
> // NULL deref
> // error path, skip init pipe
> __nfsd4_init_cld_pipe
> cn = kzalloc
> nn->cld_net = cn
> // clear nn->client_tracking_ops
>
> After nfsd mounts, users can trigger grace_done callbacks via
> /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
> in error paths, this causes access violations.
>
> Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
>
> Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
> Changes in v2:
> Use nfsd_mutex instead of adding a new flag to prevent concurrency.
> fs/nfsd/nfs4recover.c | 8 ++++++++
> fs/nfsd/nfs4state.c | 4 ++++
> fs/nfsd/nfsctl.c | 2 ++
> 3 files changed, 14 insertions(+)
>
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index 82785db730d9..8ac089f8134c 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
> if (error == -ENOENT) {
> printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
> "Reboot recovery will not function correctly!\n");
> + mutex_lock(&nfsd_mutex);
> nfsd4_client_tracking_exit(clp->net);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->create(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> void
> @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->remove(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> int
> @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> return nn->client_tracking_ops->check(clp);
> + mutex_unlock(&nfsd_mutex);
>
> return -EOPNOTSUPP;
> }
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index d5694987f86f..2794fdc8b678 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
> nn->reclaim_str_hashtbl_size) {
> printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
> clp->net->ns.inum);
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
> lt.new_timeo = 0;
> goto out;
> }
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
>
> spin_lock(&nn->s2s_cp_lock);
> idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 3f3e9f6c4250..649850b4bb60 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
> if (!nn->nfsd_serv)
> return -EBUSY;
> trace_nfsd_end_grace(netns(file));
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_lock(&nfsd_mutex);
> break;
> default:
> return -EINVAL;
This seems like a very heavyweight way to ensure that this doesn't
race, especially since the client tracking ops are per net-namespace
and the nfsd_mutex is global.
Also, how do you get two different tasks calling
nfsd4_client_tracking_init() at the same time? That's called when the
net namespace is set up, so there shouldn't be more than one copy
running.
I thought from an earlier patch that we were going to change this to
just ensure that the client_tracking_ops pointer did a NULL -> non-NULL
transition only once?
I think that's sufficient to ensure that this race can't occur.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete
2025-06-12 3:55 [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete Li Lingfeng
2025-06-17 20:49 ` Jeff Layton
@ 2025-06-18 12:35 ` Jeff Layton
2025-06-18 12:50 ` Jeff Layton
2025-06-18 20:45 ` NeilBrown
2 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2025-06-18 12:35 UTC (permalink / raw)
To: Li Lingfeng, chuck.lever, neilb, okorniev, Dai.Ngo, tom,
linux-nfs, linux-kernel
Cc: yukuai1, houtao1, yi.zhang, yangerkun, lilingfeng
On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote:
> Checking whether tracking callbacks can be called based on whether
> nn->client_tracking_ops is NULL may lead to callbacks being invoked
> before tracking initialization completes, causing resource access
> violations (UAF, NULL pointer dereference). Examples:
>
> 1) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> nfs4_cld_state_init
> nn->reclaim_str_hashtbl = kmalloc_array
> ... // error path, goto err
> nfs4_cld_state_shutdown
> kfree(nn->reclaim_str_hashtbl)
> write_v4_end_grace
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> nfs4_release_reclaim
> nn->reclaim_str_hashtbl[i]
> // UAF
> // clear nn->client_tracking_ops
>
> 2) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> write_v4_end_grace
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> alloc_cld_upcall
> cn = nn->cld_net
> spin_lock // cn->cn_lock
> // NULL deref
> // error path, skip init pipe
> __nfsd4_init_cld_pipe
> cn = kzalloc
> nn->cld_net = cn
> // clear nn->client_tracking_ops
>
Have you seen this race in the wild?
Looking at this more closely, I don't think this race is possible.
You'd need to invoke the ->init routine concurrently from two different
tasks, but nfsd4_client_tracking_init is called during net ns
initialization, which should ensure that only one task invokes it.
> After nfsd mounts, users can trigger grace_done callbacks via
> /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
> in error paths, this causes access violations.
>
> Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
>
> Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
> Changes in v2:
> Use nfsd_mutex instead of adding a new flag to prevent concurrency.
> fs/nfsd/nfs4recover.c | 8 ++++++++
> fs/nfsd/nfs4state.c | 4 ++++
> fs/nfsd/nfsctl.c | 2 ++
> 3 files changed, 14 insertions(+)
>
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index 82785db730d9..8ac089f8134c 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
> if (error == -ENOENT) {
> printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
> "Reboot recovery will not function correctly!\n");
> + mutex_lock(&nfsd_mutex);
> nfsd4_client_tracking_exit(clp->net);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->create(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> void
> @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->remove(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> int
> @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> return nn->client_tracking_ops->check(clp);
> + mutex_unlock(&nfsd_mutex);
>
> return -EOPNOTSUPP;
> }
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index d5694987f86f..2794fdc8b678 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
> nn->reclaim_str_hashtbl_size) {
> printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
> clp->net->ns.inum);
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
> lt.new_timeo = 0;
> goto out;
> }
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
>
> spin_lock(&nn->s2s_cp_lock);
> idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 3f3e9f6c4250..649850b4bb60 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
> if (!nn->nfsd_serv)
> return -EBUSY;
> trace_nfsd_end_grace(netns(file));
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_lock(&nfsd_mutex);
> break;
> default:
> return -EINVAL;
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete
2025-06-18 12:35 ` Jeff Layton
@ 2025-06-18 12:50 ` Jeff Layton
0 siblings, 0 replies; 5+ messages in thread
From: Jeff Layton @ 2025-06-18 12:50 UTC (permalink / raw)
To: Li Lingfeng, chuck.lever, neilb, okorniev, Dai.Ngo, tom,
linux-nfs, linux-kernel
Cc: yukuai1, houtao1, yi.zhang, yangerkun, lilingfeng
On Wed, 2025-06-18 at 08:35 -0400, Jeff Layton wrote:
> On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote:
> > Checking whether tracking callbacks can be called based on whether
> > nn->client_tracking_ops is NULL may lead to callbacks being invoked
> > before tracking initialization completes, causing resource access
> > violations (UAF, NULL pointer dereference). Examples:
> >
> > 1) nfsd4_client_tracking_init
> > // set nn->client_tracking_ops
> > nfsd4_cld_tracking_init
> > nfs4_cld_state_init
> > nn->reclaim_str_hashtbl = kmalloc_array
> > ... // error path, goto err
> > nfs4_cld_state_shutdown
> > kfree(nn->reclaim_str_hashtbl)
> > write_v4_end_grace
> > nfsd4_end_grace
> > nfsd4_record_grace_done
> > nfsd4_cld_grace_done
> > nfs4_release_reclaim
> > nn->reclaim_str_hashtbl[i]
> > // UAF
> > // clear nn->client_tracking_ops
> >
> > 2) nfsd4_client_tracking_init
> > // set nn->client_tracking_ops
> > nfsd4_cld_tracking_init
> > write_v4_end_grace
> > nfsd4_end_grace
> > nfsd4_record_grace_done
> > nfsd4_cld_grace_done
> > alloc_cld_upcall
> > cn = nn->cld_net
> > spin_lock // cn->cn_lock
> > // NULL deref
> > // error path, skip init pipe
> > __nfsd4_init_cld_pipe
> > cn = kzalloc
> > nn->cld_net = cn
> > // clear nn->client_tracking_ops
> >
>
>
> Have you seen this race in the wild?
>
> Looking at this more closely, I don't think this race is possible.
> You'd need to invoke the ->init routine concurrently from two different
> tasks, but nfsd4_client_tracking_init is called during net ns
> initialization, which should ensure that only one task invokes it.
>
My bad. It's not called during net namespace initialization, but during
server startup. But, the nfsd_mutex is held during this initialization,
so I still don't think this race can happen.
>
>
> > After nfsd mounts, users can trigger grace_done callbacks via
> > /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
> > in error paths, this causes access violations.
> >
> > Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
> >
> > Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
> > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> > ---
> > Changes in v2:
> > Use nfsd_mutex instead of adding a new flag to prevent concurrency.
> > fs/nfsd/nfs4recover.c | 8 ++++++++
> > fs/nfsd/nfs4state.c | 4 ++++
> > fs/nfsd/nfsctl.c | 2 ++
> > 3 files changed, 14 insertions(+)
> >
> > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> > index 82785db730d9..8ac089f8134c 100644
> > --- a/fs/nfsd/nfs4recover.c
> > +++ b/fs/nfsd/nfs4recover.c
> > @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
> > if (error == -ENOENT) {
> > printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
> > "Reboot recovery will not function correctly!\n");
> > + mutex_lock(&nfsd_mutex);
> > nfsd4_client_tracking_exit(clp->net);
> > + mutex_unlock(&nfsd_mutex);
> > }
> > }
> >
> > @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
> > {
> > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> >
> > + mutex_lock(&nfsd_mutex);
> > if (nn->client_tracking_ops)
> > nn->client_tracking_ops->create(clp);
> > + mutex_unlock(&nfsd_mutex);
> > }
> >
> > void
> > @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
> > {
> > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> >
> > + mutex_lock(&nfsd_mutex);
> > if (nn->client_tracking_ops)
> > nn->client_tracking_ops->remove(clp);
> > + mutex_unlock(&nfsd_mutex);
> > }
> >
> > int
> > @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
> > {
> > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> >
> > + mutex_lock(&nfsd_mutex);
> > if (nn->client_tracking_ops)
> > return nn->client_tracking_ops->check(clp);
> > + mutex_unlock(&nfsd_mutex);
> >
> > return -EOPNOTSUPP;
> > }
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index d5694987f86f..2794fdc8b678 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
> > nn->reclaim_str_hashtbl_size) {
> > printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
> > clp->net->ns.inum);
> > + mutex_lock(&nfsd_mutex);
> > nfsd4_end_grace(nn);
> > + mutex_unlock(&nfsd_mutex);
> > }
> > }
> >
> > @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
> > lt.new_timeo = 0;
> > goto out;
> > }
> > + mutex_lock(&nfsd_mutex);
> > nfsd4_end_grace(nn);
> > + mutex_unlock(&nfsd_mutex);
> >
> > spin_lock(&nn->s2s_cp_lock);
> > idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 3f3e9f6c4250..649850b4bb60 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
> > if (!nn->nfsd_serv)
> > return -EBUSY;
> > trace_nfsd_end_grace(netns(file));
> > + mutex_lock(&nfsd_mutex);
> > nfsd4_end_grace(nn);
> > + mutex_lock(&nfsd_mutex);
> > break;
> > default:
> > return -EINVAL;
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete
2025-06-12 3:55 [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete Li Lingfeng
2025-06-17 20:49 ` Jeff Layton
2025-06-18 12:35 ` Jeff Layton
@ 2025-06-18 20:45 ` NeilBrown
2 siblings, 0 replies; 5+ messages in thread
From: NeilBrown @ 2025-06-18 20:45 UTC (permalink / raw)
To: Li Lingfeng
Cc: chuck.lever, jlayton, okorniev, Dai.Ngo, tom, linux-nfs,
linux-kernel, yukuai1, houtao1, yi.zhang, yangerkun, lilingfeng,
lilingfeng3
On Thu, 12 Jun 2025, Li Lingfeng wrote:
> Checking whether tracking callbacks can be called based on whether
> nn->client_tracking_ops is NULL may lead to callbacks being invoked
> before tracking initialization completes, causing resource access
> violations (UAF, NULL pointer dereference). Examples:
>
> 1) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> nfs4_cld_state_init
> nn->reclaim_str_hashtbl = kmalloc_array
> ... // error path, goto err
> nfs4_cld_state_shutdown
> kfree(nn->reclaim_str_hashtbl)
> write_v4_end_grace
I suspect the problem here is that write_v4_end_grace() is one of the
few write_op functions which doesn't take nfsd_mutex. It should hold
that lock while testing ->nfsd_serv and calling nfsd4_end_grace()
write_filehandle(), write_unlock_ip(), and write_unlock_fs() also don't
take that lock. They don't even check if nfsd_serv is NULL. I suspect
they all should.
NeilBrown
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> nfs4_release_reclaim
> nn->reclaim_str_hashtbl[i]
> // UAF
> // clear nn->client_tracking_ops
>
> 2) nfsd4_client_tracking_init
> // set nn->client_tracking_ops
> nfsd4_cld_tracking_init
> write_v4_end_grace
> nfsd4_end_grace
> nfsd4_record_grace_done
> nfsd4_cld_grace_done
> alloc_cld_upcall
> cn = nn->cld_net
> spin_lock // cn->cn_lock
> // NULL deref
> // error path, skip init pipe
> __nfsd4_init_cld_pipe
> cn = kzalloc
> nn->cld_net = cn
> // clear nn->client_tracking_ops
>
> After nfsd mounts, users can trigger grace_done callbacks via
> /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
> in error paths, this causes access violations.
>
> Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
>
> Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
> Changes in v2:
> Use nfsd_mutex instead of adding a new flag to prevent concurrency.
> fs/nfsd/nfs4recover.c | 8 ++++++++
> fs/nfsd/nfs4state.c | 4 ++++
> fs/nfsd/nfsctl.c | 2 ++
> 3 files changed, 14 insertions(+)
>
> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> index 82785db730d9..8ac089f8134c 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
> if (error == -ENOENT) {
> printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
> "Reboot recovery will not function correctly!\n");
> + mutex_lock(&nfsd_mutex);
> nfsd4_client_tracking_exit(clp->net);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->create(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> void
> @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> nn->client_tracking_ops->remove(clp);
> + mutex_unlock(&nfsd_mutex);
> }
>
> int
> @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
> {
> struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>
> + mutex_lock(&nfsd_mutex);
> if (nn->client_tracking_ops)
> return nn->client_tracking_ops->check(clp);
> + mutex_unlock(&nfsd_mutex);
>
> return -EOPNOTSUPP;
> }
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index d5694987f86f..2794fdc8b678 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
> nn->reclaim_str_hashtbl_size) {
> printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
> clp->net->ns.inum);
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
> }
> }
>
> @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
> lt.new_timeo = 0;
> goto out;
> }
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_unlock(&nfsd_mutex);
>
> spin_lock(&nn->s2s_cp_lock);
> idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 3f3e9f6c4250..649850b4bb60 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
> if (!nn->nfsd_serv)
> return -EBUSY;
> trace_nfsd_end_grace(netns(file));
> + mutex_lock(&nfsd_mutex);
> nfsd4_end_grace(nn);
> + mutex_lock(&nfsd_mutex);
> break;
> default:
> return -EINVAL;
> --
> 2.46.1
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-06-18 20:46 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-12 3:55 [PATCH v2] nfsd: Invoke tracking callbacks only after initialization is complete Li Lingfeng
2025-06-17 20:49 ` Jeff Layton
2025-06-18 12:35 ` Jeff Layton
2025-06-18 12:50 ` Jeff Layton
2025-06-18 20:45 ` NeilBrown
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).