* Re: [git pull] pile 1: mount stuff
2025-10-02 5:54 [git pull] pile 1: mount stuff Al Viro
@ 2025-10-03 17:45 ` Linus Torvalds
2025-10-03 21:13 ` Al Viro
2025-10-03 18:41 ` pr-tracker-bot
1 sibling, 1 reply; 5+ messages in thread
From: Linus Torvalds @ 2025-10-03 17:45 UTC (permalink / raw)
To: Al Viro; +Cc: linux-fsdevel, Christian Brauner
[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]
On Wed, 1 Oct 2025 at 22:54, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> Several piles this cycle, this one being the largest and trickiest.
> There are several trivial conflicts in fs/namespace.c; I've pushed a conflict
> resolution variant into #proposed.merge.
Thanks. The merge wasn't _that_ complicated, but this was definitely
one of those times when I wanted to check that I hadn't missed
something, and it was good to just compare my merge against your
proposed one.
That new path through 'emptied_ns' made me go 'brr'. But if I got
something wrong, at least I got it wrong the same way you did..
And a small note wrt that new 'emptied_ns' thing: I do wish those
static variables that are protected by namespace_sem would be actually
*grouped* with that lock. Yes, they have comments, and yes, they are
defined next to it, but in the actual *users* you don't see those
comments and that grouping.
IOW, I would actually like to see the grouping very explicit in the
users too, Putting them all into an anonymous union like the attached
patch would do that.
What do you think? This patch is ENTIRELY UNTESTED. It compiled for
me, that's all I can say. It *looks* fine, and it reads nicely and
groups those variables with the lock that protects them. But it's a
quick throw-away patch for "maybe something like this?"
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 13452 bytes --]
fs/namespace.c | 106 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 56 insertions(+), 50 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index d39499ab5cb5..a4077f9723dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -86,23 +86,29 @@ static u64 mnt_id_ctr = MNT_UNIQUE_ID_OFFSET;
static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
static struct kmem_cache *mnt_cache __ro_after_init;
-static DECLARE_RWSEM(namespace_sem);
-static HLIST_HEAD(unmounted); /* protected by namespace_sem */
-static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
-static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
+
+static struct {
+ struct rw_semaphore sem;
+ struct hlist_head unmounted;
+ struct list_head ex_mountpoints;
+ struct mnt_namespace *emptied_ns;
+ struct list_head(notify_list);
+} fs_namespace = {
+ .sem = __RWSEM_INITIALIZER(fs_namespace.sem),
+ .unmounted = HLIST_HEAD_INIT,
+ .ex_mountpoints = LIST_HEAD_INIT(fs_namespace.ex_mountpoints),
+ .emptied_ns = NULL,
+ .notify_list = LIST_HEAD_INIT(fs_namespace.notify_list),
+};
static inline void namespace_lock(void);
static void namespace_unlock(void);
DEFINE_LOCK_GUARD_0(namespace_excl, namespace_lock(), namespace_unlock())
-DEFINE_LOCK_GUARD_0(namespace_shared, down_read(&namespace_sem),
- up_read(&namespace_sem))
+DEFINE_LOCK_GUARD_0(namespace_shared, down_read(&fs_namespace.sem),
+ up_read(&fs_namespace.sem))
DEFINE_FREE(mntput, struct vfsmount *, if (!IS_ERR(_T)) mntput(_T))
-#ifdef CONFIG_FSNOTIFY
-LIST_HEAD(notify_list); /* protected by namespace_sem */
-#endif
-
enum mount_kattr_flags_t {
MOUNT_KATTR_RECURSE = (1 << 0),
MOUNT_KATTR_IDMAP_REPLACE = (1 << 1),
@@ -171,8 +177,8 @@ static void mnt_ns_tree_remove(struct mnt_namespace *ns)
* Lookup a mount namespace by id and take a passive reference count. Taking a
* passive reference means the mount namespace can be emptied if e.g., the last
* task holding an active reference exits. To access the mounts of the
- * namespace the @namespace_sem must first be acquired. If the namespace has
- * already shut down before acquiring @namespace_sem, {list,stat}mount() will
+ * namespace the @fs_namespace.sem must first be acquired. If the namespace has
+ * already shut down before acquiring @fs_namespace.sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*
* Note the lookup is lockless protected by a sequence counter. We only
@@ -948,13 +954,13 @@ static void maybe_free_mountpoint(struct mountpoint *mp, struct list_head *list)
}
/*
- * locks: mount_lock [read_seqlock_excl], namespace_sem [excl]
+ * locks: mount_lock [read_seqlock_excl], fs_namespace.sem [excl]
*/
static void unpin_mountpoint(struct pinned_mountpoint *m)
{
if (m->mp) {
hlist_del(&m->node);
- maybe_free_mountpoint(m->mp, &ex_mountpoints);
+ maybe_free_mountpoint(m->mp, &fs_namespace.ex_mountpoints);
}
}
@@ -1016,11 +1022,11 @@ static void __umount_mnt(struct mount *mnt, struct list_head *shrink_list)
}
/*
- * locks: mount_lock[write_seqlock], namespace_sem[excl] (for ex_mountpoints)
+ * locks: mount_lock[write_seqlock], fs_namespace.sem[excl] (for fs_namespace.ex_mountpoints)
*/
static void umount_mnt(struct mount *mnt)
{
- __umount_mnt(mnt, &ex_mountpoints);
+ __umount_mnt(mnt, &fs_namespace.ex_mountpoints);
}
/*
@@ -1079,7 +1085,7 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
attach_mnt(mnt, parent, mp);
- maybe_free_mountpoint(old_mp, &ex_mountpoints);
+ maybe_free_mountpoint(old_mp, &fs_namespace.ex_mountpoints);
}
static inline struct mount *node_to_mount(struct rb_node *node)
@@ -1537,12 +1543,12 @@ static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id
#ifdef CONFIG_PROC_FS
-/* iterator; we want it to have access to namespace_sem, thus here... */
+/* iterator; we want it to have access to fs_namespace.sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_mounts *p = m->private;
- down_read(&namespace_sem);
+ down_read(&fs_namespace.sem);
return mnt_find_id_at(p->ns, *pos);
}
@@ -1562,7 +1568,7 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
static void m_stop(struct seq_file *m, void *v)
{
- up_read(&namespace_sem);
+ up_read(&fs_namespace.sem);
}
static int m_show(struct seq_file *m, void *v)
@@ -1625,12 +1631,12 @@ EXPORT_SYMBOL(may_umount_tree);
int may_umount(struct vfsmount *mnt)
{
int ret = 1;
- down_read(&namespace_sem);
+ down_read(&fs_namespace.sem);
lock_mount_hash();
if (propagate_mount_busy(real_mount(mnt), 2))
ret = 0;
unlock_mount_hash();
- up_read(&namespace_sem);
+ up_read(&fs_namespace.sem);
return ret;
}
@@ -1659,7 +1665,7 @@ static void notify_mnt_list(void)
* Notify about mounts that were added/reparented/detached/remain
* connected after unmount.
*/
- list_for_each_entry_safe(m, tmp, ¬ify_list, to_notify) {
+ list_for_each_entry_safe(m, tmp, &fs_namespace.notify_list, to_notify) {
mnt_notify(m);
list_del_init(&m->to_notify);
}
@@ -1667,7 +1673,7 @@ static void notify_mnt_list(void)
static bool need_notify_mnt_list(void)
{
- return !list_empty(¬ify_list);
+ return !list_empty(&fs_namespace.notify_list);
}
#else
static void notify_mnt_list(void)
@@ -1686,12 +1692,12 @@ static void namespace_unlock(void)
struct hlist_head head;
struct hlist_node *p;
struct mount *m;
- struct mnt_namespace *ns = emptied_ns;
+ struct mnt_namespace *ns = fs_namespace.emptied_ns;
LIST_HEAD(list);
- hlist_move_list(&unmounted, &head);
- list_splice_init(&ex_mountpoints, &list);
- emptied_ns = NULL;
+ hlist_move_list(&fs_namespace.unmounted, &head);
+ list_splice_init(&fs_namespace.ex_mountpoints, &list);
+ fs_namespace.emptied_ns = NULL;
if (need_notify_mnt_list()) {
/*
@@ -1699,11 +1705,11 @@ static void namespace_unlock(void)
* are sent. This will also allow statmount()/listmount() to run
* concurrently.
*/
- downgrade_write(&namespace_sem);
+ downgrade_write(&fs_namespace.sem);
notify_mnt_list();
- up_read(&namespace_sem);
+ up_read(&fs_namespace.sem);
} else {
- up_write(&namespace_sem);
+ up_write(&fs_namespace.sem);
}
if (unlikely(ns)) {
/* Make sure we notice when we leak mounts. */
@@ -1726,7 +1732,7 @@ static void namespace_unlock(void)
static inline void namespace_lock(void)
{
- down_write(&namespace_sem);
+ down_write(&fs_namespace.sem);
}
enum umount_tree_flags {
@@ -1766,7 +1772,7 @@ static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
/*
* mount_lock must be held
- * namespace_sem must be held for write
+ * fs_namespace.sem must be held for write
*/
static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
{
@@ -1820,7 +1826,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
}
}
if (disconnect)
- hlist_add_head(&p->mnt_umount, &unmounted);
+ hlist_add_head(&p->mnt_umount, &fs_namespace.unmounted);
/*
* At this point p->mnt_ns is NULL, notification will be queued
@@ -1989,7 +1995,7 @@ void __detach_mounts(struct dentry *dentry)
mnt = hlist_entry(mp.node.next, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
umount_mnt(mnt);
- hlist_add_head(&mnt->mnt_umount, &unmounted);
+ hlist_add_head(&mnt->mnt_umount, &fs_namespace.unmounted);
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
@@ -2296,7 +2302,7 @@ void dissolve_on_fput(struct vfsmount *mnt)
/*
* m used to be the root of anon namespace; if it still is one,
* we need to dissolve the mount tree and free that namespace.
- * Let's try to avoid taking namespace_sem if we can determine
+ * Let's try to avoid taking fs_namespace.sem if we can determine
* that there's nothing to do without it - rcu_read_lock() is
* enough to make anon_ns_root() memory-safe and once m has
* left its namespace, it's no longer our concern, since it will
@@ -2312,7 +2318,7 @@ void dissolve_on_fput(struct vfsmount *mnt)
if (!anon_ns_root(m))
return;
- emptied_ns = m->mnt_ns;
+ fs_namespace.emptied_ns = m->mnt_ns;
lock_mount_hash();
umount_tree(m, UMOUNT_CONNECTED);
unlock_mount_hash();
@@ -2615,7 +2621,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
} else {
if (source_mnt->mnt_ns) {
/* move from anon - the caller will destroy */
- emptied_ns = source_mnt->mnt_ns;
+ fs_namespace.emptied_ns = source_mnt->mnt_ns;
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
move_from_ns(p);
}
@@ -2700,7 +2706,7 @@ static inline struct mount *where_to_mount(const struct path *path,
* @beneath: whether the intention is to mount beneath @path
*
* To mount something at given location, we need
- * namespace_sem locked exclusive
+ * fs_namespace.sem locked exclusive
* inode of dentry we are mounting on locked exclusive
* struct mountpoint for that dentry
* struct mount we are mounting on
@@ -2712,7 +2718,7 @@ static inline struct mount *where_to_mount(const struct path *path,
* On failure we have res->parent set to ERR_PTR(-E...), res->mp
* left NULL, res->node - empty.
* In case of success do_lock_mount returns with locks acquired (in
- * proper order - inode lock nests outside of namespace_sem).
+ * proper order - inode lock nests outside of fs_namespace.sem).
*
* Request to mount on overmounted location is treated as "mount on
* top of whatever's overmounting it"; request to mount beneath
@@ -2772,7 +2778,7 @@ static void do_lock_mount(const struct path *path,
}
/*
* Drop the temporary references. This is subtle - on success
- * we are doing that under namespace_sem, which would normally
+ * we are doing that under fs_namespace.sem, which would normally
* be forbidden. However, in that case we are guaranteed that
* refcounts won't reach zero, since we know that path->mnt
* is mounted and thus all mounts reachable from it are pinned
@@ -3022,7 +3028,7 @@ static struct mnt_namespace *get_detached_copy(const struct path *path, bool rec
/*
* Record the sequence number of the source mount namespace.
- * This needs to hold namespace_sem to ensure that the mount
+ * This needs to hold fs_namespace.sem to ensure that the mount
* doesn't get attached.
*/
if (is_mounted(path->mnt)) {
@@ -3035,7 +3041,7 @@ static struct mnt_namespace *get_detached_copy(const struct path *path, bool rec
mnt = __do_loopback(path, recursive);
if (IS_ERR(mnt)) {
- emptied_ns = ns;
+ fs_namespace.emptied_ns = ns;
return ERR_CAST(mnt);
}
@@ -3364,7 +3370,7 @@ static int do_set_group(const struct path *from_path, const struct path *to_path
* Check if path is overmounted, i.e., if there's a mount on top of
* @path->mnt with @path->dentry as mountpoint.
*
- * Context: namespace_sem must be held at least shared.
+ * Context: fs_namespace.sem must be held at least shared.
* MUST NOT be called under lock_mount_hash() (there one should just
* call __lookup_mnt() and check if it returns NULL).
* Return: If path is overmounted true is returned, false if not.
@@ -3387,7 +3393,7 @@ static inline bool path_overmounted(const struct path *path)
/*
* Check if there is a possibly empty chain of descent from p1 to p2.
- * Locks: namespace_sem (shared) or mount_lock (read_seqlock_excl).
+ * Locks: fs_namespace.sem (shared) or mount_lock (read_seqlock_excl).
*/
static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
{
@@ -3407,7 +3413,7 @@ static bool mount_is_ancestor(const struct mount *p1, const struct mount *p2)
* - Make sure that the caller can unmount the topmost mount ensuring
* that the caller could reveal the underlying mountpoint.
* - Ensure that nothing has been mounted on top of @mnt_from before we
- * grabbed @namespace_sem to avoid creating pointless shadow mounts.
+ * grabbed @fs_namespace.sem to avoid creating pointless shadow mounts.
* - Prevent mounting beneath a mount if the propagation relationship
* between the source mount, parent mount, and top mount would lead to
* nonsensical mount trees.
@@ -4137,7 +4143,7 @@ struct mnt_namespace *copy_mnt_ns(u64 flags, struct mnt_namespace *ns,
copy_flags |= CL_SLAVE;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
- emptied_ns = new_ns;
+ fs_namespace.emptied_ns = new_ns;
return ERR_CAST(new);
}
if (user_ns != ns->user_ns) {
@@ -5526,7 +5532,7 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
{
struct mount *first, *child;
- rwsem_assert_held(&namespace_sem);
+ rwsem_assert_held(&fs_namespace.sem);
/* We're looking at our own ns, just use get_fs_root. */
if (ns == current->nsproxy->mnt_ns) {
@@ -5852,7 +5858,7 @@ static ssize_t do_listmount(struct klistmount *kls, bool reverse)
struct mount *r, *first;
ssize_t ret;
- rwsem_assert_held(&namespace_sem);
+ rwsem_assert_held(&fs_namespace.sem);
ret = grab_requested_root(ns, &kls->root);
if (ret)
@@ -6063,7 +6069,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
if (!ns_ref_put(ns))
return;
guard(namespace_excl)();
- emptied_ns = ns;
+ fs_namespace.emptied_ns = ns;
guard(mount_writer)();
umount_tree(ns->root, 0);
}
^ permalink raw reply related [flat|nested] 5+ messages in thread