* [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls
@ 2025-11-17 12:36 Christian Brauner
2025-12-22 21:49 ` Eric Biggers
2026-02-23 15:50 ` [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls David Lechner
0 siblings, 2 replies; 7+ messages in thread
From: Christian Brauner @ 2025-11-17 12:36 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Jeff Layton, Josef Bacik, Jan Kara,
Amir Goldstein
We have reworked namespaces sufficiently that all this special-casing
shouldn't be needed anymore
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/pidfs.c | 75 ++++++++++++++++++++++++++----------------------------
1 file changed, 36 insertions(+), 39 deletions(-)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index db236427fc2c..78dee3c201af 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -454,7 +454,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct task_struct *task __free(put_task) = NULL;
struct nsproxy *nsp __free(put_nsproxy) = NULL;
struct ns_common *ns_common = NULL;
- struct pid_namespace *pid_ns;
if (!pidfs_ioctl_valid(cmd))
return -ENOIOCTLCMD;
@@ -496,66 +495,64 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
- if (IS_ENABLED(CONFIG_CGROUPS)) {
- get_cgroup_ns(nsp->cgroup_ns);
- ns_common = to_ns_common(nsp->cgroup_ns);
- }
+ if (!ns_ref_get(nsp->cgroup_ns))
+ break;
+ ns_common = to_ns_common(nsp->cgroup_ns);
break;
case PIDFD_GET_IPC_NAMESPACE:
- if (IS_ENABLED(CONFIG_IPC_NS)) {
- get_ipc_ns(nsp->ipc_ns);
- ns_common = to_ns_common(nsp->ipc_ns);
- }
+ if (!ns_ref_get(nsp->ipc_ns))
+ break;
+ ns_common = to_ns_common(nsp->ipc_ns);
break;
case PIDFD_GET_MNT_NAMESPACE:
- get_mnt_ns(nsp->mnt_ns);
+ if (!ns_ref_get(nsp->mnt_ns))
+ break;
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
- if (IS_ENABLED(CONFIG_NET_NS)) {
- ns_common = to_ns_common(nsp->net_ns);
- get_net_ns(ns_common);
- }
+ if (!ns_ref_get(nsp->net_ns))
+ break;
+ ns_common = to_ns_common(nsp->net_ns);
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- get_pid_ns(nsp->pid_ns_for_children);
- ns_common = to_ns_common(nsp->pid_ns_for_children);
- }
+ if (!ns_ref_get(nsp->pid_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
break;
case PIDFD_GET_TIME_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns);
- ns_common = to_ns_common(nsp->time_ns);
- }
+ if (!ns_ref_get(nsp->time_ns))
+ break;
+ ns_common = to_ns_common(nsp->time_ns);
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns_for_children);
- ns_common = to_ns_common(nsp->time_ns_for_children);
- }
+ if (!ns_ref_get(nsp->time_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->time_ns_for_children);
break;
case PIDFD_GET_UTS_NAMESPACE:
- if (IS_ENABLED(CONFIG_UTS_NS)) {
- get_uts_ns(nsp->uts_ns);
- ns_common = to_ns_common(nsp->uts_ns);
- }
+ if (!ns_ref_get(nsp->uts_ns))
+ break;
+ ns_common = to_ns_common(nsp->uts_ns);
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
- if (IS_ENABLED(CONFIG_USER_NS)) {
- rcu_read_lock();
- ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
- rcu_read_unlock();
+ scoped_guard(rcu) {
+ struct user_namespace *user_ns;
+
+ user_ns = task_cred_xxx(task, user_ns);
+ if (!ns_ref_get(user_ns))
+ break;
+ ns_common = to_ns_common(user_ns);
}
break;
case PIDFD_GET_PID_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- rcu_read_lock();
+ scoped_guard(rcu) {
+ struct pid_namespace *pid_ns;
+
pid_ns = task_active_pid_ns(task);
- if (pid_ns)
- ns_common = to_ns_common(get_pid_ns(pid_ns));
- rcu_read_unlock();
+ if (!ns_ref_get(pid_ns))
+ break;
+ ns_common = to_ns_common(pid_ns);
}
break;
default:
--
2.47.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls
2025-11-17 12:36 [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls Christian Brauner
@ 2025-12-22 21:49 ` Eric Biggers
2025-12-24 12:00 ` [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef Christian Brauner
2026-02-23 15:50 ` [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls David Lechner
1 sibling, 1 reply; 7+ messages in thread
From: Eric Biggers @ 2025-12-22 21:49 UTC (permalink / raw)
To: Christian Brauner
Cc: linux-fsdevel, Jeff Layton, Josef Bacik, Jan Kara, Amir Goldstein
On Mon, Nov 17, 2025 at 01:36:13PM +0100, Christian Brauner wrote:
> We have reworked namespaces sufficiently that all this special-casing
> shouldn't be needed anymore
>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
This causes a NULL dereference in nsfs_evict() when CONFIG_NET_NS=n.
Nothing special needed besides that: it happens just when booting the
system, using systemd.
Reverting this commit on mainline fixes the bug.
- Eric
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef
2025-12-22 21:49 ` Eric Biggers
@ 2025-12-24 12:00 ` Christian Brauner
2025-12-28 21:01 ` Eric Biggers
2026-01-06 15:18 ` Brendan Jackman
0 siblings, 2 replies; 7+ messages in thread
From: Christian Brauner @ 2025-12-24 12:00 UTC (permalink / raw)
To: linux-fsdevel
Cc: Christian Brauner, Eric Biggers, Jeff Layton, Josef Bacik,
Jan Kara, Amir Goldstein
We originally protected PIDFD_GET_<ns-type>_NAMESPACE ioctls() through
ifdefs and recent rework made it possible to drop them. There was an
oversight though. When the relevant namespace is turned off ns->ops will
be NULL so even though opening a file descriptor is perfectly legitimate
it would fail during inode eviction when the file was closed.
The simple fix would be to check ns->ops for NULL and continue allow to
retrieve namespace fds from pidfds but we don't allow retrieving them
when the relevant namespace type is turned off. So keep the
simplification but add the ifdefs back in.
Reported-by: Eric Biggers <ebiggers@kernel.org>
Link: https://lore.kernel.org/20251222214907.GA189632@quark
Fixes: a71e4f103aed ("pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls")
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/pidfs.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index dba703d4ce4a..1e20e36e0ed5 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -517,14 +517,18 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
+#ifdef CONFIG_CGROUPS
if (!ns_ref_get(nsp->cgroup_ns))
break;
ns_common = to_ns_common(nsp->cgroup_ns);
+#endif
break;
case PIDFD_GET_IPC_NAMESPACE:
+#ifdef CONFIG_IPC_NS
if (!ns_ref_get(nsp->ipc_ns))
break;
ns_common = to_ns_common(nsp->ipc_ns);
+#endif
break;
case PIDFD_GET_MNT_NAMESPACE:
if (!ns_ref_get(nsp->mnt_ns))
@@ -532,32 +536,43 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
+#ifdef CONFIG_NET_NS
if (!ns_ref_get(nsp->net_ns))
break;
ns_common = to_ns_common(nsp->net_ns);
+#endif
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+#ifdef CONFIG_PID_NS
if (!ns_ref_get(nsp->pid_ns_for_children))
break;
ns_common = to_ns_common(nsp->pid_ns_for_children);
+#endif
break;
case PIDFD_GET_TIME_NAMESPACE:
+#ifdef CONFIG_TIME_NS
if (!ns_ref_get(nsp->time_ns))
break;
ns_common = to_ns_common(nsp->time_ns);
+#endif
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+#ifdef CONFIG_TIME_NS
if (!ns_ref_get(nsp->time_ns_for_children))
break;
ns_common = to_ns_common(nsp->time_ns_for_children);
+#endif
break;
case PIDFD_GET_UTS_NAMESPACE:
+#ifdef CONFIG_UTS_NS
if (!ns_ref_get(nsp->uts_ns))
break;
ns_common = to_ns_common(nsp->uts_ns);
+#endif
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
+#ifdef CONFIG_USER_NS
scoped_guard(rcu) {
struct user_namespace *user_ns;
@@ -566,8 +581,10 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
ns_common = to_ns_common(user_ns);
}
+#endif
break;
case PIDFD_GET_PID_NAMESPACE:
+#ifdef CONFIG_PID_NS
scoped_guard(rcu) {
struct pid_namespace *pid_ns;
@@ -576,6 +593,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
ns_common = to_ns_common(pid_ns);
}
+#endif
break;
default:
return -ENOIOCTLCMD;
--
2.47.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef
2025-12-24 12:00 ` [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef Christian Brauner
@ 2025-12-28 21:01 ` Eric Biggers
2026-01-06 15:18 ` Brendan Jackman
1 sibling, 0 replies; 7+ messages in thread
From: Eric Biggers @ 2025-12-28 21:01 UTC (permalink / raw)
To: Christian Brauner
Cc: linux-fsdevel, Jeff Layton, Josef Bacik, Jan Kara, Amir Goldstein
On Wed, Dec 24, 2025 at 01:00:24PM +0100, Christian Brauner wrote:
> We originally protected PIDFD_GET_<ns-type>_NAMESPACE ioctls() through
> ifdefs and recent rework made it possible to drop them. There was an
> oversight though. When the relevant namespace is turned off ns->ops will
> be NULL so even though opening a file descriptor is perfectly legitimate
> it would fail during inode eviction when the file was closed.
>
> The simple fix would be to check ns->ops for NULL and continue allow to
> retrieve namespace fds from pidfds but we don't allow retrieving them
> when the relevant namespace type is turned off. So keep the
> simplification but add the ifdefs back in.
>
> Reported-by: Eric Biggers <ebiggers@kernel.org>
> Link: https://lore.kernel.org/20251222214907.GA189632@quark
> Fixes: a71e4f103aed ("pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls")
> Signed-off-by: Christian Brauner <brauner@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
> case PIDFD_GET_USER_NAMESPACE:
>#ifdef CONFIG_USER_NS
> scoped_guard(rcu) {
> struct user_namespace *user_ns;
>
> user_ns = task_cred_xxx(task, user_ns);
> if (!ns_ref_get(user_ns))
> break;
> ns_common = to_ns_common(user_ns);
> }
>#endif
> break;
Not directly related to this patch, but you know that the 'break;' above
breaks from the scoped_guard() block and not from the switch statement,
right? (Considering that scoped_guard() is implemented using 'for'.)
There is a break after the scoped_guard(), so it ends up being the same.
But it's confusing. It would be much easier to understand if it was
rewritten to not use an inner 'break':
if (ns_ref_get(user_ns))
ns_common = to_ns_common(user_ns);
- Eric
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef
2025-12-24 12:00 ` [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef Christian Brauner
2025-12-28 21:01 ` Eric Biggers
@ 2026-01-06 15:18 ` Brendan Jackman
1 sibling, 0 replies; 7+ messages in thread
From: Brendan Jackman @ 2026-01-06 15:18 UTC (permalink / raw)
To: Christian Brauner, linux-fsdevel
Cc: Eric Biggers, Jeff Layton, Josef Bacik, Jan Kara, Amir Goldstein
On Wed Dec 24, 2025 at 12:00 PM UTC, Christian Brauner wrote:
> We originally protected PIDFD_GET_<ns-type>_NAMESPACE ioctls() through
> ifdefs and recent rework made it possible to drop them. There was an
> oversight though. When the relevant namespace is turned off ns->ops will
> be NULL so even though opening a file descriptor is perfectly legitimate
> it would fail during inode eviction when the file was closed.
>
> The simple fix would be to check ns->ops for NULL and continue allow to
> retrieve namespace fds from pidfds but we don't allow retrieving them
> when the relevant namespace type is turned off. So keep the
> simplification but add the ifdefs back in.
>
> Reported-by: Eric Biggers <ebiggers@kernel.org>
> Link: https://lore.kernel.org/20251222214907.GA189632@quark
> Fixes: a71e4f103aed ("pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls")
> Signed-off-by: Christian Brauner <brauner@kernel.org>
Thanks Christian, I was also getting a NULL deref from running
the x86:ldt_gdt_32 kselftest on QEMU under a minimal kconfig (via
task_active_pid_ns()), this fixes it.
Tested-by: Brendan Jackman <jackmanb@kernel.org>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls
2025-11-17 12:36 [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls Christian Brauner
2025-12-22 21:49 ` Eric Biggers
@ 2026-02-23 15:50 ` David Lechner
2026-02-24 11:08 ` Christian Brauner
1 sibling, 1 reply; 7+ messages in thread
From: David Lechner @ 2026-02-23 15:50 UTC (permalink / raw)
To: Christian Brauner, linux-fsdevel
Cc: Jeff Layton, Josef Bacik, Jan Kara, Amir Goldstein
On 11/17/25 6:36 AM, Christian Brauner wrote:
> We have reworked namespaces sufficiently that all this special-casing
> shouldn't be needed anymore
>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> fs/pidfs.c | 75 ++++++++++++++++++++++++++----------------------------
> 1 file changed, 36 insertions(+), 39 deletions(-)
>
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index db236427fc2c..78dee3c201af 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
...
> case PIDFD_GET_USER_NAMESPACE:
> - if (IS_ENABLED(CONFIG_USER_NS)) {
> - rcu_read_lock();
> - ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
> - rcu_read_unlock();
> + scoped_guard(rcu) {
> + struct user_namespace *user_ns;
> +
> + user_ns = task_cred_xxx(task, user_ns);
> + if (!ns_ref_get(user_ns))
> + break;
I think this code is a bit misleading and could lead to unintentional
mistakes in future changes.
scoped_guard() is implemented using a for loop, so this break statement
is only breaking out of the the scoped_guard() scope and not breaking
out of the case as one might expect.
I suggest to change the logic to avoid the break or at least add a
comment pointing out the unusual behavior.
> + ns_common = to_ns_common(user_ns);
> }
> break;
> case PIDFD_GET_PID_NAMESPACE:
> - if (IS_ENABLED(CONFIG_PID_NS)) {
> - rcu_read_lock();
> + scoped_guard(rcu) {
> + struct pid_namespace *pid_ns;
> +
> pid_ns = task_active_pid_ns(task);
> - if (pid_ns)
> - ns_common = to_ns_common(get_pid_ns(pid_ns));
> - rcu_read_unlock();
> + if (!ns_ref_get(pid_ns))
> + break;
Same situation here.
> + ns_common = to_ns_common(pid_ns);
> }
> break;
> default:
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls
2026-02-23 15:50 ` [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls David Lechner
@ 2026-02-24 11:08 ` Christian Brauner
0 siblings, 0 replies; 7+ messages in thread
From: Christian Brauner @ 2026-02-24 11:08 UTC (permalink / raw)
To: David Lechner
Cc: linux-fsdevel, Jeff Layton, Josef Bacik, Jan Kara, Amir Goldstein
On Mon, Feb 23, 2026 at 09:50:13AM -0600, David Lechner wrote:
> On 11/17/25 6:36 AM, Christian Brauner wrote:
> > We have reworked namespaces sufficiently that all this special-casing
> > shouldn't be needed anymore
> >
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> > fs/pidfs.c | 75 ++++++++++++++++++++++++++----------------------------
> > 1 file changed, 36 insertions(+), 39 deletions(-)
> >
> > diff --git a/fs/pidfs.c b/fs/pidfs.c
> > index db236427fc2c..78dee3c201af 100644
> > --- a/fs/pidfs.c
> > +++ b/fs/pidfs.c
>
> ...
>
> > case PIDFD_GET_USER_NAMESPACE:
> > - if (IS_ENABLED(CONFIG_USER_NS)) {
> > - rcu_read_lock();
> > - ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
> > - rcu_read_unlock();
> > + scoped_guard(rcu) {
> > + struct user_namespace *user_ns;
> > +
> > + user_ns = task_cred_xxx(task, user_ns);
> > + if (!ns_ref_get(user_ns))
> > + break;
>
> I think this code is a bit misleading and could lead to unintentional
> mistakes in future changes.
>
> scoped_guard() is implemented using a for loop, so this break statement
> is only breaking out of the the scoped_guard() scope and not breaking
> out of the case as one might expect.
>
> I suggest to change the logic to avoid the break or at least add a
> comment pointing out the unusual behavior.
>
> > + ns_common = to_ns_common(user_ns);
> > }
> > break;
> > case PIDFD_GET_PID_NAMESPACE:
> > - if (IS_ENABLED(CONFIG_PID_NS)) {
> > - rcu_read_lock();
> > + scoped_guard(rcu) {
> > + struct pid_namespace *pid_ns;
> > +
> > pid_ns = task_active_pid_ns(task);
> > - if (pid_ns)
> > - ns_common = to_ns_common(get_pid_ns(pid_ns));
> > - rcu_read_unlock();
> > + if (!ns_ref_get(pid_ns))
> > + break;
>
> Same situation here.
>
> > + ns_common = to_ns_common(pid_ns);
> > }
> > break;
> > default:
Good point, we can change this to:
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 1e20e36e0ed5..21f9f011a957 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -577,9 +577,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct user_namespace *user_ns;
user_ns = task_cred_xxx(task, user_ns);
- if (!ns_ref_get(user_ns))
- break;
- ns_common = to_ns_common(user_ns);
+ if (ns_ref_get(user_ns))
+ ns_common = to_ns_common(user_ns);
}
#endif
break;
@@ -589,9 +588,8 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct pid_namespace *pid_ns;
pid_ns = task_active_pid_ns(task);
- if (!ns_ref_get(pid_ns))
- break;
- ns_common = to_ns_common(pid_ns);
+ if (ns_ref_get(pid_ns))
+ ns_common = to_ns_common(pid_ns);
}
#endif
break;
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2026-02-24 11:08 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-17 12:36 [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls Christian Brauner
2025-12-22 21:49 ` Eric Biggers
2025-12-24 12:00 ` [PATCH] pidfs: protect PIDFD_GET_* ioctls() via ifdef Christian Brauner
2025-12-28 21:01 ` Eric Biggers
2026-01-06 15:18 ` Brendan Jackman
2026-02-23 15:50 ` [PATCH] pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls David Lechner
2026-02-24 11:08 ` Christian Brauner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox