From: Jan Kara <jack@suse.cz>
To: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>,
linux-fsdevel@vger.kernel.org
Subject: Re: [RFC PATCH] fs: make s_count atomic_t
Date: Thu, 2 Nov 2023 14:48:42 +0100 [thread overview]
Message-ID: <20231102134842.tu26pykjpzgi4veo@quack3> (raw)
In-Reply-To: <20231027-neurologie-miterleben-a8c52a745463@brauner>
On Fri 27-10-23 11:35:20, Christian Brauner wrote:
> So, I believe we can drop the sb_lock in bdev_super_lock() for all
> holder operations if we turn s_count into an atomic. It will slightly
> change semantics for list walks like iterate_supers() but imho that's
> fine. It'll mean that list walkes need a acquire sb_lock, then try to
> get reference via atomic_inc_not_zero().
>
> The logic there is simply that if you still found the superblock on the
> list then yes, someone could now concurrently drop s_count to zero
> behind your back. But because you hold sb_lock they can't remove it from
> the list behind your back.
>
> So if you now fail atomic_inc_not_zero() then you know that someone
> concurrently managed to drop the ref to zero and wants to remove that sb
> from the list. So you just ignore that super block and go on walking the
> list. If however, you manage to get an active reference things are fine
> and you can try to trade that temporary reference for an active
> reference. So my theory at least...
>
> Yes, ofc we add atomics but for superblocks we shouldn't care especially
> we have less and less list walkers. Both get_super() and
> get_active_super() are gone after all.
>
> I'm running xfstests as I'm sending this and I need to start finishing
> PRs so in RFC mode. Thoughts?
>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
So in principle I agree we can get rid of some sb_lock use if we convert
sb->s_count to an atomic type. However does this bring any significant
benefit? I would not expect sb_lock to be contended and as you say you need
to be more careful about the races then so that is a reason against such
change.
Honza
> ---
> fs/super.c | 93 ++++++++++++++++++++++++++--------------------
> include/linux/fs.h | 2 +-
> 2 files changed, 53 insertions(+), 42 deletions(-)
>
> diff --git a/fs/super.c b/fs/super.c
> index 71e5e61cfc9e..c58de6bb5633 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -375,7 +375,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
> INIT_LIST_HEAD(&s->s_inodes_wb);
> spin_lock_init(&s->s_inode_wblist_lock);
>
> - s->s_count = 1;
> + atomic_set(&s->s_count, 1);
> atomic_set(&s->s_active, 1);
> mutex_init(&s->s_vfs_rename_mutex);
> lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
> @@ -409,19 +409,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
> /*
> * Drop a superblock's refcount. The caller must hold sb_lock.
> */
> -static void __put_super(struct super_block *s)
> -{
> - if (!--s->s_count) {
> - list_del_init(&s->s_list);
> - WARN_ON(s->s_dentry_lru.node);
> - WARN_ON(s->s_inode_lru.node);
> - WARN_ON(!list_empty(&s->s_mounts));
> - security_sb_free(s);
> - put_user_ns(s->s_user_ns);
> - kfree(s->s_subtype);
> - call_rcu(&s->rcu, destroy_super_rcu);
> - }
> -}
>
> /**
> * put_super - drop a temporary reference to superblock
> @@ -430,10 +417,20 @@ static void __put_super(struct super_block *s)
> * Drops a temporary reference, frees superblock if there's no
> * references left.
> */
> -void put_super(struct super_block *sb)
> +void put_super(struct super_block *s)
> {
> + if (!atomic_dec_and_test(&s->s_count))
> + return;
> +
> spin_lock(&sb_lock);
> - __put_super(sb);
> + list_del_init(&s->s_list);
> + WARN_ON(s->s_dentry_lru.node);
> + WARN_ON(s->s_inode_lru.node);
> + WARN_ON(!list_empty(&s->s_mounts));
> + security_sb_free(s);
> + put_user_ns(s->s_user_ns);
> + kfree(s->s_subtype);
> + call_rcu(&s->rcu, destroy_super_rcu);
> spin_unlock(&sb_lock);
> }
>
> @@ -548,8 +545,11 @@ static bool grab_super(struct super_block *sb)
> {
> bool locked;
>
> - sb->s_count++;
> + locked = atomic_inc_not_zero(&sb->s_count);
> spin_unlock(&sb_lock);
> + if (!locked)
> + return false;
> +
> locked = super_lock_excl(sb);
> if (locked) {
> if (atomic_inc_not_zero(&sb->s_active)) {
> @@ -908,19 +908,20 @@ static void __iterate_supers(void (*f)(struct super_block *))
> /* Pairs with memory marrier in super_wake(). */
> if (smp_load_acquire(&sb->s_flags) & SB_DYING)
> continue;
> - sb->s_count++;
> + if (!atomic_inc_not_zero(&sb->s_count))
> + continue;
> spin_unlock(&sb_lock);
>
> f(sb);
>
> - spin_lock(&sb_lock);
> if (p)
> - __put_super(p);
> + put_super(p);
> p = sb;
> + spin_lock(&sb_lock);
> }
> - if (p)
> - __put_super(p);
> spin_unlock(&sb_lock);
> + if (p)
> + put_super(p);
> }
> /**
> * iterate_supers - call function for all active superblocks
> @@ -938,7 +939,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
> list_for_each_entry(sb, &super_blocks, s_list) {
> bool locked;
>
> - sb->s_count++;
> + if (!atomic_inc_not_zero(&sb->s_count))
> + continue;
> spin_unlock(&sb_lock);
>
> locked = super_lock_shared(sb);
> @@ -948,14 +950,14 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
> super_unlock_shared(sb);
> }
>
> - spin_lock(&sb_lock);
> if (p)
> - __put_super(p);
> + put_super(p);
> p = sb;
> + spin_lock(&sb_lock);
> }
> - if (p)
> - __put_super(p);
> spin_unlock(&sb_lock);
> + if (p)
> + put_super(p);
> }
>
> /**
> @@ -976,7 +978,8 @@ void iterate_supers_type(struct file_system_type *type,
> hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
> bool locked;
>
> - sb->s_count++;
> + if (!atomic_inc_not_zero(&sb->s_count))
> + continue;
> spin_unlock(&sb_lock);
>
> locked = super_lock_shared(sb);
> @@ -986,14 +989,14 @@ void iterate_supers_type(struct file_system_type *type,
> super_unlock_shared(sb);
> }
>
> - spin_lock(&sb_lock);
> if (p)
> - __put_super(p);
> + put_super(p);
> p = sb;
> + spin_lock(&sb_lock);
> }
> - if (p)
> - __put_super(p);
> spin_unlock(&sb_lock);
> + if (p)
> + put_super(p);
> }
>
> EXPORT_SYMBOL(iterate_supers_type);
> @@ -1007,7 +1010,8 @@ struct super_block *user_get_super(dev_t dev, bool excl)
> if (sb->s_dev == dev) {
> bool locked;
>
> - sb->s_count++;
> + if (!atomic_inc_not_zero(&sb->s_count))
> + continue;
> spin_unlock(&sb_lock);
> /* still alive? */
> locked = super_lock(sb, excl);
> @@ -1017,8 +1021,8 @@ struct super_block *user_get_super(dev_t dev, bool excl)
> super_unlock(sb, excl);
> }
> /* nope, got unmounted */
> + put_super(sb);
> spin_lock(&sb_lock);
> - __put_super(sb);
> break;
> }
> }
> @@ -1387,20 +1391,27 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
> __releases(&bdev->bd_holder_lock)
> {
> struct super_block *sb = bdev->bd_holder;
> - bool locked;
> + bool active;
>
> lockdep_assert_held(&bdev->bd_holder_lock);
> lockdep_assert_not_held(&sb->s_umount);
> lockdep_assert_not_held(&bdev->bd_disk->open_mutex);
>
> - /* Make sure sb doesn't go away from under us */
> - spin_lock(&sb_lock);
> - sb->s_count++;
> - spin_unlock(&sb_lock);
> + active = atomic_inc_not_zero(&sb->s_count);
>
> mutex_unlock(&bdev->bd_holder_lock);
>
> - locked = super_lock(sb, excl);
> + /*
> + * The bd_holder_lock guarantees that @sb is still valid.
> + * sb->s_count can't be zero. If it were it would mean that we
> + * found a block device that has bdev->bd_holder set to a
> + * superblock that's about to be freed. IOW, there's a UAF
> + * somewhere...
> + */
> + if (WARN_ON_ONCE(!active))
> + return NULL;
> +
> + active = super_lock(sb, excl);
>
> /*
> * If the superblock wasn't already SB_DYING then we hold
> @@ -1408,7 +1419,7 @@ static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
> */
> put_super(sb);
>
> - if (!locked)
> + if (!active)
> return NULL;
>
> if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 5174e821d451..68e453c155af 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1201,7 +1201,7 @@ struct super_block {
> unsigned long s_magic;
> struct dentry *s_root;
> struct rw_semaphore s_umount;
> - int s_count;
> + atomic_t s_count;
> atomic_t s_active;
> #ifdef CONFIG_SECURITY
> void *s_security;
> --
> 2.34.1
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2023-11-02 13:48 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-27 9:35 [RFC PATCH] fs: make s_count atomic_t Christian Brauner
2023-11-02 13:48 ` Jan Kara [this message]
2023-11-02 16:05 ` Christian Brauner
2023-11-03 8:19 ` Christoph Hellwig
2023-11-06 18:08 ` Al Viro
2023-11-07 15:01 ` Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231102134842.tu26pykjpzgi4veo@quack3 \
--to=jack@suse.cz \
--cc=brauner@kernel.org \
--cc=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).