From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Amir Goldstein <amir73il@gmail.com>,
linux-fsdevel@vger.kernel.org
Cc: "Josef Bacik" <josef@toxicpanda.com>,
"Jeff Layton" <jlayton@kernel.org>, "Mike Yuan" <me@yhndnzj.com>,
"Zbigniew Jędrzejewski-Szmek" <zbyszek@in.waw.pl>,
"Lennart Poettering" <mzxreary@0pointer.de>,
"Daan De Meyer" <daan.j.demeyer@gmail.com>,
"Aleksa Sarai" <cyphar@cyphar.com>,
"Alexander Viro" <viro@zeniv.linux.org.uk>,
"Jens Axboe" <axboe@kernel.dk>, "Tejun Heo" <tj@kernel.org>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Michal Koutný" <mkoutny@suse.com>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Chuck Lever" <chuck.lever@oracle.com>,
linux-nfs@vger.kernel.org, linux-kselftest@vger.kernel.org,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, netdev@vger.kernel.org,
"Christian Brauner" <brauner@kernel.org>
Subject: [PATCH 17/32] mnt: support iterator
Date: Wed, 10 Sep 2025 16:37:02 +0200 [thread overview]
Message-ID: <20250910-work-namespace-v1-17-4dd56e7359d8@kernel.org> (raw)
In-Reply-To: <20250910-work-namespace-v1-0-4dd56e7359d8@kernel.org>
Move the mount namespace to the generic iterator.
This allows us to drop a bunch of members from struct mnt_namespace.
t
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/mount.h | 10 +---
fs/namespace.c | 141 +++++++++++++--------------------------------------------
fs/nsfs.c | 4 +-
3 files changed, 35 insertions(+), 120 deletions(-)
diff --git a/fs/mount.h b/fs/mount.h
index 97737051a8b9..76bf863c9ae2 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -17,11 +17,7 @@ struct mnt_namespace {
};
struct user_namespace *user_ns;
struct ucounts *ucounts;
- u64 seq; /* Sequence number to prevent loops */
- union {
- wait_queue_head_t poll;
- struct rcu_head mnt_ns_rcu;
- };
+ wait_queue_head_t poll;
u64 seq_origin; /* Sequence number of origin mount namespace */
u64 event;
#ifdef CONFIG_FSNOTIFY
@@ -30,8 +26,6 @@ struct mnt_namespace {
#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
- struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
- struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
@@ -173,7 +167,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
- return ns->seq == 0;
+ return ns->ns.ns_id == 0;
}
static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c
index 14c5cdbdd6e1..40a8d75f6b16 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
#include <linux/shmem_fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/pidfs.h>
+#include <linux/nstree.h>
#include "pnode.h"
#include "internal.h"
@@ -80,13 +81,10 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
-static DEFINE_SEQLOCK(mnt_ns_tree_lock);
#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
-static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
-static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
enum mount_kattr_flags_t {
MOUNT_KATTR_RECURSE = (1 << 0),
@@ -119,53 +117,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
+ struct ns_common *ns;
+
if (!node)
return NULL;
- return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
-}
-
-static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
-{
- struct mnt_namespace *ns_a = node_to_mnt_ns(a);
- struct mnt_namespace *ns_b = node_to_mnt_ns(b);
- u64 seq_a = ns_a->seq;
- u64 seq_b = ns_b->seq;
-
- if (seq_a < seq_b)
- return -1;
- if (seq_a > seq_b)
- return 1;
- return 0;
-}
-
-static inline void mnt_ns_tree_write_lock(void)
-{
- write_seqlock(&mnt_ns_tree_lock);
-}
-
-static inline void mnt_ns_tree_write_unlock(void)
-{
- write_sequnlock(&mnt_ns_tree_lock);
-}
-
-static void mnt_ns_tree_add(struct mnt_namespace *ns)
-{
- struct rb_node *node, *prev;
-
- mnt_ns_tree_write_lock();
- node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
- /*
- * If there's no previous entry simply add it after the
- * head and if there is add it after the previous entry.
- */
- prev = rb_prev(&ns->mnt_ns_tree_node);
- if (!prev)
- list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
- else
- list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
- mnt_ns_tree_write_unlock();
-
- WARN_ON_ONCE(node);
+ ns = rb_entry(node, struct ns_common, ns_tree_node);
+ return container_of(ns, struct mnt_namespace, ns);
}
static void mnt_ns_release(struct mnt_namespace *ns)
@@ -181,32 +138,16 @@ DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_release_rcu(struct rcu_head *rcu)
{
- mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
+ mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));
}
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
- if (!is_anon_ns(ns)) {
- mnt_ns_tree_write_lock();
- rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
- list_bidir_del_rcu(&ns->mnt_ns_list);
- mnt_ns_tree_write_unlock();
- }
-
- call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
-}
-
-static int mnt_ns_find(const void *key, const struct rb_node *node)
-{
- const u64 mnt_ns_id = *(u64 *)key;
- const struct mnt_namespace *ns = node_to_mnt_ns(node);
+ if (!is_anon_ns(ns))
+ ns_tree_remove(ns);
- if (mnt_ns_id < ns->seq)
- return -1;
- if (mnt_ns_id > ns->seq)
- return 1;
- return 0;
+ call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);
}
/*
@@ -225,28 +166,21 @@ static int mnt_ns_find(const void *key, const struct rb_node *node)
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
- struct mnt_namespace *ns;
- struct rb_node *node;
- unsigned int seq;
+ struct mnt_namespace *mnt_ns;
+ struct ns_common *ns;
guard(rcu)();
- do {
- seq = read_seqbegin(&mnt_ns_tree_lock);
- node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
- if (node)
- break;
- } while (read_seqretry(&mnt_ns_tree_lock, seq));
-
- if (!node)
+ ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);
+ if (!ns)
return NULL;
/*
* The last reference count is put with RCU delay so we can
* unconditonally acquire a reference here.
*/
- ns = node_to_mnt_ns(node);
- refcount_inc(&ns->passive);
- return ns;
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ refcount_inc(&mnt_ns->passive);
+ return mnt_ns;
}
static inline void lock_mount_hash(void)
@@ -1017,7 +951,7 @@ static inline bool check_anonymous_mnt(struct mount *mnt)
return false;
seq = mnt->mnt_ns->seq_origin;
- return !seq || (seq == current->nsproxy->mnt_ns->seq);
+ return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);
}
/*
@@ -2155,19 +2089,16 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
{
+ struct ns_common *ns;
+
guard(rcu)();
for (;;) {
- struct list_head *list;
-
- if (previous)
- list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
- else
- list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
- if (list_is_head(list, &mnt_ns_list))
- return ERR_PTR(-ENOENT);
+ ns = ns_tree_adjoined_rcu(mntns, previous);
+ if (IS_ERR(ns))
+ return ERR_CAST(ns);
- mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+ mntns = to_mnt_ns(ns);
/*
* The last passive reference count is put with RCU
@@ -2207,7 +2138,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
if (!mnt_ns)
return false;
- return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+ return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;
}
struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
@@ -3070,7 +3001,7 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
if (is_anon_ns(src_mnt_ns))
ns->seq_origin = src_mnt_ns->seq_origin;
else
- ns->seq_origin = src_mnt_ns->seq;
+ ns->seq_origin = src_mnt_ns->ns.ns_id;
}
mnt = __do_loopback(path, recursive);
@@ -4153,15 +4084,6 @@ static void free_mnt_ns(struct mnt_namespace *ns)
mnt_ns_tree_remove(ns);
}
-/*
- * Assign a sequence number so we can detect when we attempt to bind
- * mount a reference to an older mount namespace into the current
- * mount namespace, preventing reference counting loops. A 64bit
- * number incrementing at 10Ghz will take 12,427 years to wrap which
- * is effectively never, so we can ignore the possibility.
- */
-static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
{
struct mnt_namespace *new_ns;
@@ -4185,11 +4107,11 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
return ERR_PTR(ret);
}
if (!anon)
- new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
+ ns_tree_gen_id(&new_ns->ns);
+ RB_CLEAR_NODE(&new_ns->ns.ns_tree_node);
+ INIT_LIST_HEAD(&new_ns->ns.ns_list_node);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
- INIT_LIST_HEAD(&new_ns->mnt_ns_list);
- RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -4275,7 +4197,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
if (pwdmnt)
mntput(pwdmnt);
- mnt_ns_tree_add(new_ns);
+ ns_tree_add_raw(new_ns);
return new_ns;
}
@@ -5385,7 +5307,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
- s->sm.mnt_ns_id = ns->seq;
+ s->sm.mnt_ns_id = ns->ns.ns_id;
}
static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
@@ -6090,7 +6012,6 @@ static void __init init_mount_tree(void)
ns = alloc_mnt_ns(&init_user_ns, true);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
- ns->seq = atomic64_inc_return(&mnt_ns_seq);
ns->ns.inum = PROC_MNT_INIT_INO;
m = real_mount(mnt);
ns->root = m;
@@ -6105,7 +6026,7 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
- mnt_ns_tree_add(ns);
+ ns_tree_add(ns);
}
void __init mnt_init(void)
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 34f0b35d3ead..6f8008177133 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -139,7 +139,7 @@ static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns,
* the size value will be set to the size the kernel knows about.
*/
kinfo->size = min(usize, sizeof(*kinfo));
- kinfo->mnt_ns_id = mnt_ns->seq;
+ kinfo->mnt_ns_id = mnt_ns->ns.ns_id;
kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts);
/* Subtract the root mount of the mount namespace. */
if (kinfo->nr_mounts)
@@ -221,7 +221,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
mnt_ns = container_of(ns, struct mnt_namespace, ns);
idp = (__u64 __user *)arg;
- id = mnt_ns->seq;
+ id = mnt_ns->ns.ns_id;
return put_user(id, idp);
}
case NS_GET_PID_FROM_PIDNS:
--
2.47.3
next prev parent reply other threads:[~2025-09-10 14:38 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 14:36 [PATCH 00/32] ns: support file handles Christian Brauner
2025-09-10 14:36 ` [PATCH 01/32] pidfs: validate extensible ioctls Christian Brauner
2025-09-10 15:33 ` Jan Kara
2025-09-10 16:33 ` Aleksa Sarai
2025-10-23 10:46 ` Jiri Slaby
2025-10-24 22:31 ` Jan Kara
2025-09-10 14:36 ` [PATCH 02/32] nsfs: " Christian Brauner
2025-09-10 15:34 ` Jan Kara
2025-09-10 14:36 ` [PATCH 03/32] block: use extensible_ioctl_valid() Christian Brauner
2025-09-10 15:34 ` Jan Kara
2025-09-10 16:39 ` Jens Axboe
2025-09-10 14:36 ` [PATCH 04/32] ns: move to_ns_common() to ns_common.h Christian Brauner
2025-09-10 15:36 ` Jan Kara
2025-09-10 14:36 ` [PATCH 05/32] nsfs: add nsfs.h header Christian Brauner
2025-09-10 15:37 ` Jan Kara
2025-09-10 14:36 ` [PATCH 06/32] ns: uniformly initialize ns_common Christian Brauner
2025-09-10 15:40 ` Jan Kara
2025-09-10 14:36 ` [PATCH 07/32] mnt: use ns_common_init() Christian Brauner
2025-09-10 15:40 ` Jan Kara
2025-09-10 14:36 ` [PATCH 08/32] ipc: " Christian Brauner
2025-09-10 15:40 ` Jan Kara
2025-09-10 14:36 ` [PATCH 09/32] cgroup: " Christian Brauner
2025-09-10 15:42 ` Jan Kara
2025-09-10 14:36 ` [PATCH 10/32] pid: " Christian Brauner
2025-09-10 15:42 ` Jan Kara
2025-09-10 14:36 ` [PATCH 11/32] time: " Christian Brauner
2025-09-10 15:18 ` Thomas Gleixner
2025-09-10 15:44 ` Jan Kara
2025-09-10 14:36 ` [PATCH 12/32] uts: " Christian Brauner
2025-09-10 15:46 ` Jan Kara
2025-09-10 14:36 ` [PATCH 13/32] user: " Christian Brauner
2025-09-10 15:46 ` Jan Kara
2025-09-10 14:36 ` [PATCH 14/32] net: " Christian Brauner
2025-09-10 15:57 ` Jan Kara
2025-09-11 8:46 ` Christian Brauner
2025-09-11 9:19 ` Jan Kara
2025-09-10 21:07 ` Sasha Levin
2025-09-10 14:37 ` [PATCH 15/32] ns: remove ns_alloc_inum() Christian Brauner
2025-09-10 15:48 ` Jan Kara
2025-09-10 14:37 ` [PATCH 16/32] nstree: make iterator generic Christian Brauner
2025-09-10 14:37 ` Christian Brauner [this message]
2025-09-18 0:46 ` [PATCH 17/32] mnt: support iterator Askar Safin
2025-09-10 14:37 ` [PATCH 18/32] cgroup: " Christian Brauner
2025-09-10 16:48 ` Tejun Heo
2025-09-10 14:37 ` [PATCH 19/32] ipc: " Christian Brauner
2025-09-10 14:37 ` [PATCH 20/32] net: " Christian Brauner
2025-09-10 14:37 ` [PATCH 21/32] pid: " Christian Brauner
2025-09-10 14:37 ` [PATCH 22/32] time: " Christian Brauner
2025-09-10 15:19 ` Thomas Gleixner
2025-09-10 14:37 ` [PATCH 23/32] userns: " Christian Brauner
2025-09-10 14:37 ` [PATCH 24/32] uts: " Christian Brauner
2025-09-10 14:37 ` [PATCH 25/32] ns: add to_<type>_ns() to respective headers Christian Brauner
2025-09-10 16:35 ` Aleksa Sarai
2025-09-21 7:35 ` Thomas Gleixner
2025-09-10 14:37 ` [PATCH 26/32] nsfs: add current_in_namespace() Christian Brauner
2025-09-10 16:38 ` Aleksa Sarai
2025-09-10 14:37 ` [PATCH 27/32] nsfs: support file handles Christian Brauner
2025-09-10 17:21 ` Amir Goldstein
2025-09-11 9:31 ` Christian Brauner
2025-09-11 11:36 ` Amir Goldstein
2025-09-12 8:19 ` Christian Brauner
2025-09-12 9:12 ` Amir Goldstein
2025-09-18 3:40 ` Aleksa Sarai
2025-09-10 14:37 ` [PATCH 28/32] nsfs: support exhaustive " Christian Brauner
2025-09-10 17:07 ` Amir Goldstein
2025-09-10 14:37 ` [PATCH 29/32] nsfs: add missing id retrieval support Christian Brauner
2025-09-10 16:49 ` Aleksa Sarai
2025-09-11 7:52 ` Christian Brauner
2025-09-11 12:56 ` Aleksa Sarai
2025-09-10 14:37 ` [PATCH 30/32] tools: update nsfs.h uapi header Christian Brauner
2025-09-10 14:37 ` [PATCH 31/32] selftests/namespaces: add identifier selftests Christian Brauner
2025-09-10 14:37 ` [PATCH 32/32] selftests/namespaces: add file handle selftests Christian Brauner
2025-09-10 17:30 ` Amir Goldstein
2025-09-11 9:15 ` Christian Brauner
2025-09-11 11:48 ` Amir Goldstein
2025-09-10 21:46 ` Bart Van Assche
2025-09-11 8:59 ` Christian Brauner
2025-09-10 20:53 ` [syzbot ci] Re: ns: support file handles syzbot ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250910-work-namespace-v1-17-4dd56e7359d8@kernel.org \
--to=brauner@kernel.org \
--cc=amir73il@gmail.com \
--cc=axboe@kernel.dk \
--cc=cgroups@vger.kernel.org \
--cc=chuck.lever@oracle.com \
--cc=cyphar@cyphar.com \
--cc=daan.j.demeyer@gmail.com \
--cc=edumazet@google.com \
--cc=hannes@cmpxchg.org \
--cc=horms@kernel.org \
--cc=jack@suse.cz \
--cc=jlayton@kernel.org \
--cc=josef@toxicpanda.com \
--cc=kuba@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=me@yhndnzj.com \
--cc=mkoutny@suse.com \
--cc=mzxreary@0pointer.de \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=tj@kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=zbyszek@in.waw.pl \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).