From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Amir Goldstein <amir73il@gmail.com>,
linux-fsdevel@vger.kernel.org
Cc: "Josef Bacik" <josef@toxicpanda.com>,
"Jeff Layton" <jlayton@kernel.org>, "Mike Yuan" <me@yhndnzj.com>,
"Zbigniew Jędrzejewski-Szmek" <zbyszek@in.waw.pl>,
"Lennart Poettering" <mzxreary@0pointer.de>,
"Daan De Meyer" <daan.j.demeyer@gmail.com>,
"Aleksa Sarai" <cyphar@cyphar.com>,
"Alexander Viro" <viro@zeniv.linux.org.uk>,
"Jens Axboe" <axboe@kernel.dk>, "Tejun Heo" <tj@kernel.org>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Michal Koutný" <mkoutny@suse.com>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Chuck Lever" <chuck.lever@oracle.com>,
linux-nfs@vger.kernel.org, linux-kselftest@vger.kernel.org,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, netdev@vger.kernel.org,
"Christian Brauner" <brauner@kernel.org>
Subject: [PATCH v2 28/33] nsfs: support file handles
Date: Fri, 12 Sep 2025 13:52:51 +0200 [thread overview]
Message-ID: <20250912-work-namespace-v2-28-1a247645cef5@kernel.org> (raw)
In-Reply-To: <20250912-work-namespace-v2-0-1a247645cef5@kernel.org>
A while ago we added support for file handles to pidfs so pidfds can be
encoded and decoded as file handles. Userspace has adopted this quickly
and it's proven very useful. Implement file handles for namespaces as
well.
A process is not always able to open /proc/self/ns/. That requires
procfs to be mounted and for /proc/self/ or /proc/self/ns/ to not be
overmounted. However, userspace can always derive a namespace fd from
a pidfd. And that always works for a task's own namespace.
There's no need to introduce unnecessary behavioral differences between
/proc/self/ns/ fds, pidfd-derived namespace fds, and file-handle-derived
namespace fds. So namespace file handles are always decodable if the
caller is located in the namespace the file handle refers to.
This also allows a task to e.g., store a set of file handles to its
namespaces in a file on-disk so it can verify when it gets rexeced that
they're still valid and so on. This is akin to the pidfd use-case.
Or just plainly for namespace comparison reasons where a file handle to
the task's own namespace can be easily compared against others.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/nsfs.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/exportfs.h | 6 ++
include/uapi/linux/nsfs.h | 9 +++
3 files changed, 173 insertions(+)
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 80e631aeb3ce..926e2680414e 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -13,6 +13,12 @@
#include <linux/nsfs.h>
#include <linux/uaccess.h>
#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <linux/exportfs.h>
+#include <linux/nstree.h>
+#include <net/net_namespace.h>
#include "mount.h"
#include "internal.h"
@@ -417,12 +423,164 @@ static const struct stashed_operations nsfs_stashed_ops = {
.put_data = nsfs_put_data,
};
+#define NSFS_FID_SIZE_U32_VER0 (NSFS_FILE_HANDLE_SIZE_VER0 / sizeof(u32))
+#define NSFS_FID_SIZE_U32_LATEST (NSFS_FILE_HANDLE_SIZE_LATEST / sizeof(u32))
+
+static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ struct inode *parent)
+{
+ struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
+ struct ns_common *ns = inode->i_private;
+ int len = *max_len;
+
+ if (parent)
+ return FILEID_INVALID;
+
+ if (len < NSFS_FID_SIZE_U32_VER0) {
+ *max_len = NSFS_FID_SIZE_U32_LATEST;
+ return FILEID_INVALID;
+ } else if (len > NSFS_FID_SIZE_U32_LATEST) {
+ *max_len = NSFS_FID_SIZE_U32_LATEST;
+ }
+
+ fid->ns_id = ns->ns_id;
+ fid->ns_type = ns->ops->type;
+ fid->ns_inum = inode->i_ino;
+ return FILEID_NSFS;
+}
+
+static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
+ int fh_len, int fh_type)
+{
+ struct path path __free(path_put) = {};
+ struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
+ struct user_namespace *owning_ns = NULL;
+ struct ns_common *ns;
+ int ret;
+
+ if (fh_len < NSFS_FID_SIZE_U32_VER0)
+ return NULL;
+
+ /* Check that any trailing bytes are zero. */
+ if ((fh_len > NSFS_FID_SIZE_U32_LATEST) &&
+ memchr_inv((void *)fid + NSFS_FID_SIZE_U32_LATEST, 0,
+ fh_len - NSFS_FID_SIZE_U32_LATEST))
+ return NULL;
+
+ switch (fh_type) {
+ case FILEID_NSFS:
+ break;
+ default:
+ return NULL;
+ }
+
+ scoped_guard(rcu) {
+ ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
+ if (!ns)
+ return NULL;
+
+ VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
+ VFS_WARN_ON_ONCE(ns->ops->type != fid->ns_type);
+ VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
+
+ if (!refcount_inc_not_zero(&ns->count))
+ return NULL;
+ }
+
+ switch (ns->ops->type) {
+#ifdef CONFIG_CGROUPS
+ case CLONE_NEWCGROUP:
+ if (!current_in_namespace(to_cg_ns(ns)))
+ owning_ns = to_cg_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_IPC_NS
+ case CLONE_NEWIPC:
+ if (!current_in_namespace(to_ipc_ns(ns)))
+ owning_ns = to_ipc_ns(ns)->user_ns;
+ break;
+#endif
+ case CLONE_NEWNS:
+ if (!current_in_namespace(to_mnt_ns(ns)))
+ owning_ns = to_mnt_ns(ns)->user_ns;
+ break;
+#ifdef CONFIG_NET_NS
+ case CLONE_NEWNET:
+ if (!current_in_namespace(to_net_ns(ns)))
+ owning_ns = to_net_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_PID_NS
+ case CLONE_NEWPID:
+ if (!current_in_namespace(to_pid_ns(ns))) {
+ owning_ns = to_pid_ns(ns)->user_ns;
+ } else if (!READ_ONCE(to_pid_ns(ns)->child_reaper)) {
+ ns->ops->put(ns);
+ return ERR_PTR(-EPERM);
+ }
+ break;
+#endif
+#ifdef CONFIG_TIME_NS
+ case CLONE_NEWTIME:
+ if (!current_in_namespace(to_time_ns(ns)))
+ owning_ns = to_time_ns(ns)->user_ns;
+ break;
+#endif
+#ifdef CONFIG_USER_NS
+ case CLONE_NEWUSER:
+ if (!current_in_namespace(to_user_ns(ns)))
+ owning_ns = to_user_ns(ns);
+ break;
+#endif
+#ifdef CONFIG_UTS_NS
+ case CLONE_NEWUTS:
+ if (!current_in_namespace(to_uts_ns(ns)))
+ owning_ns = to_uts_ns(ns)->user_ns;
+ break;
+#endif
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
+ ns->ops->put(ns);
+ return ERR_PTR(-EPERM);
+ }
+
+ /* path_from_stashed() unconditionally consumes the reference. */
+ ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return no_free_ptr(path.dentry);
+}
+
+static int nsfs_export_permission(struct handle_to_path_ctx *ctx,
+ unsigned int oflags)
+{
+ /* nsfs_fh_to_dentry() performs all permission checks. */
+ return 0;
+}
+
+static struct file *nsfs_export_open(struct path *path, unsigned int oflags)
+{
+ return file_open_root(path, "", oflags, 0);
+}
+
+static const struct export_operations nsfs_export_operations = {
+ .encode_fh = nsfs_encode_fh,
+ .fh_to_dentry = nsfs_fh_to_dentry,
+ .open = nsfs_export_open,
+ .permission = nsfs_export_permission,
+};
+
static int nsfs_init_fs_context(struct fs_context *fc)
{
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
if (!ctx)
return -ENOMEM;
ctx->ops = &nsfs_ops;
+ ctx->eops = &nsfs_export_operations;
ctx->dops = &ns_dentry_operations;
fc->s_fs_info = (void *)&nsfs_stashed_ops;
return 0;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1ea49c..3aac58a520c7 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -122,6 +122,12 @@ enum fid_type {
FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1,
FILEID_BCACHEFS_WITH_PARENT = 0xb2,
+ /*
+ *
+ * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number.
+ */
+ FILEID_NSFS = 0xf1,
+
/*
* 64 bit unique kernfs id
*/
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index 97d8d80d139f..fa86fe3c8bd3 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -53,4 +53,13 @@ enum init_ns_ino {
MNT_NS_INIT_INO = 0xEFFFFFF8U,
};
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
#endif /* __LINUX_NSFS_H */
--
2.47.3
next prev parent reply other threads:[~2025-09-12 11:55 UTC|newest]
Thread overview: 71+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-12 11:52 [PATCH v2 00/33] ns: support file handles Christian Brauner
2025-09-12 11:52 ` [PATCH v2 01/33] pidfs: validate extensible ioctls Christian Brauner
2025-09-12 11:52 ` [PATCH v2 02/33] nsfs: drop tautological ioctl() check Christian Brauner
2025-09-12 11:52 ` [PATCH v2 03/33] nsfs: validate extensible ioctls Christian Brauner
2025-09-12 11:52 ` [PATCH v2 04/33] block: use extensible_ioctl_valid() Christian Brauner
2025-09-16 11:18 ` Mark Brown
2025-09-16 13:40 ` Dan Carpenter
2025-09-18 13:17 ` Jan Kara
2025-09-19 12:28 ` Christian Brauner
2025-09-12 11:52 ` [PATCH v2 05/33] ns: move to_ns_common() to ns_common.h Christian Brauner
2025-09-12 11:52 ` [PATCH v2 06/33] nsfs: add nsfs.h header Christian Brauner
2025-09-12 11:52 ` [PATCH v2 07/33] ns: uniformly initialize ns_common Christian Brauner
2025-09-12 11:52 ` [PATCH v2 08/33] cgroup: use ns_common_init() Christian Brauner
2025-09-12 15:48 ` Tejun Heo
2025-09-12 11:52 ` [PATCH v2 09/33] ipc: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 10/33] mnt: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 11/33] net: " Christian Brauner
2025-09-15 11:07 ` Jan Kara
2025-09-15 11:42 ` Christian Brauner
2025-09-15 11:50 ` Jan Kara
2025-09-15 22:59 ` Al Viro
2025-09-12 11:52 ` [PATCH v2 12/33] pid: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 13/33] time: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 14/33] user: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 15/33] uts: " Christian Brauner
2025-09-12 11:52 ` [PATCH v2 16/33] ns: remove ns_alloc_inum() Christian Brauner
2025-09-12 11:52 ` [PATCH v2 17/33] nstree: make iterator generic Christian Brauner
2025-09-15 11:49 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 18/33] mnt: support ns lookup Christian Brauner
2025-09-15 11:48 ` Jan Kara
2025-09-15 13:45 ` Christian Brauner
2025-09-16 3:24 ` Kuniyuki Iwashima
2025-09-16 3:59 ` Al Viro
2025-09-16 3:56 ` Al Viro
2025-09-16 3:59 ` Al Viro
2025-09-16 4:46 ` Al Viro
2025-09-17 9:50 ` Christian Brauner
2025-09-18 10:21 ` Al Viro
2025-09-12 11:52 ` [PATCH v2 19/33] cgroup: " Christian Brauner
2025-09-15 11:53 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 20/33] ipc: " Christian Brauner
2025-09-15 11:56 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 21/33] net: " Christian Brauner
2025-09-15 12:02 ` Jan Kara
2025-09-15 13:47 ` Christian Brauner
2025-09-16 3:59 ` Kuniyuki Iwashima
2025-09-12 11:52 ` [PATCH v2 22/33] pid: " Christian Brauner
2025-09-15 12:04 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 23/33] time: " Christian Brauner
2025-09-15 12:06 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 24/33] user: " Christian Brauner
2025-09-15 12:11 ` Jan Kara
2025-09-15 13:54 ` Christian Brauner
2025-09-15 14:14 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 25/33] uts: " Christian Brauner
2025-09-15 12:59 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 26/33] ns: add to_<type>_ns() to respective headers Christian Brauner
2025-09-15 12:06 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 27/33] nsfs: add current_in_namespace() Christian Brauner
2025-09-15 13:08 ` Jan Kara
2025-09-12 11:52 ` Christian Brauner [this message]
2025-09-15 13:25 ` [PATCH v2 28/33] nsfs: support file handles Jan Kara
2025-09-15 13:55 ` Christian Brauner
2025-09-12 11:52 ` [PATCH v2 29/33] nsfs: support exhaustive " Christian Brauner
2025-09-15 13:26 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 30/33] nsfs: add missing id retrieval support Christian Brauner
2025-09-15 13:28 ` Jan Kara
2025-09-12 11:52 ` [PATCH v2 31/33] tools: update nsfs.h uapi header Christian Brauner
2025-09-12 11:52 ` [PATCH v2 32/33] selftests/namespaces: add identifier selftests Christian Brauner
2025-09-12 11:52 ` [PATCH v2 33/33] selftests/namespaces: add file handle selftests Christian Brauner
2025-09-16 4:55 ` [PATCH v2 00/33] ns: support file handles Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250912-work-namespace-v2-28-1a247645cef5@kernel.org \
--to=brauner@kernel.org \
--cc=amir73il@gmail.com \
--cc=axboe@kernel.dk \
--cc=cgroups@vger.kernel.org \
--cc=chuck.lever@oracle.com \
--cc=cyphar@cyphar.com \
--cc=daan.j.demeyer@gmail.com \
--cc=edumazet@google.com \
--cc=hannes@cmpxchg.org \
--cc=horms@kernel.org \
--cc=jack@suse.cz \
--cc=jlayton@kernel.org \
--cc=josef@toxicpanda.com \
--cc=kuba@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=me@yhndnzj.com \
--cc=mkoutny@suse.com \
--cc=mzxreary@0pointer.de \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=tj@kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=zbyszek@in.waw.pl \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).