From: Christian Brauner <brauner@kernel.org>
To: linux-fsdevel@vger.kernel.org
Cc: Theodore Ts'o <tytso@mit.edu>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Jan Kara <jack@suse.cz>,
"Ritesh Harjani (IBM)" <ritesh.list@gmail.com>,
linux-ext4@vger.kernel.org, linux-cifs@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>,
"Christian Brauner (Amutable)" <brauner@kernel.org>
Subject: [PATCH 8/8] super: convert iterators to RCU readers + refcount_inc_not_zero
Date: Tue, 26 May 2026 17:09:10 +0200 [thread overview]
Message-ID: <20260526-work-sget-v1-8-263f7025cedd@kernel.org> (raw)
In-Reply-To: <20260526-work-sget-v1-0-263f7025cedd@kernel.org>
Walk @super_blocks and @fs_supers under rcu_read_lock() and pin the
current entry with refcount_inc_not_zero() instead of holding sb_lock
across the cursor advance. sb_lock was only there to keep the
cursor's ->next / ->prev pointer from being mutated by concurrent
list_del / list_add. RCU semantics give us that guarantee directly:
list_bidir_del_rcu() preserves both ->next and ->prev on the
unlinked entry and list_add_tail_rcu() publishes new entries with
the release barrier set up by the previous patch.
The pattern at each iterator is:
rcu_read_lock();
list_for_each_entry_rcu(sb, ...) {
if (SB_DYING) continue;
if (!refcount_inc_not_zero(&sb->s_count)) continue;
rcu_read_unlock();
... /* may sleep on s_umount */
if (prev)
put_super(prev);
prev = sb;
rcu_read_lock(); /* prev pinned: prev->{next,prev} valid */
}
rcu_read_unlock();
if (prev)
put_super(prev);
While we hold a pin on @prev, __put_super() cannot reach the
refcount_dec_and_test() transition that drives list_bidir_del_rcu().
So @prev stays on the list and concurrent list_bidir_del_rcu() of
other entries keeps @prev->s_list.{next,prev} pointing at the still-
live neighbour (or the head sentinel). The cursor advance after
re-acquiring rcu_read_lock() is therefore always against a live
chain in whichever direction we're walking.
put_super() now appears in the middle of the loop where __put_super()
used to be called with sb_lock held. It briefly takes sb_lock for
the trailing-ref drop; in the common case dec_and_test() returns
false and the lock is held for only a handful of cycles.
first_super() and next_super() switch the forward arm to READ_ONCE()
on the head and cursor ->next pointers and the reverse arm to
rcu_dereference(list_bidir_prev_rcu(...)). The forward arm matches
the semantics of list_entry_rcu() used internally by
list_for_each_entry_rcu(); the reverse arm is the canonical
bidirectional-RCU traversal pattern (see kernel/nstree.c) and is
needed because filesystems_freeze() and do_emergency_remount() pass
SUPER_ITER_REVERSE.
iterate_supers_type() and user_get_super() get the same treatment.
user_get_super() simplifies further: on lookup hit we return with
the pin; on lookup miss followed by SB_DYING discovery we put_super()
and return NULL.
sget_fc() and grab_super() are not touched here.
Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
---
fs/super.c | 71 +++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 38 insertions(+), 33 deletions(-)
diff --git a/fs/super.c b/fs/super.c
index 8c01b95be717..d9b1148f7030 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -831,17 +831,25 @@ enum super_iter_flags_t {
static inline struct super_block *first_super(enum super_iter_flags_t flags)
{
+ struct list_head *next;
+
if (flags & SUPER_ITER_REVERSE)
- return list_last_entry(&super_blocks, struct super_block, s_list);
- return list_first_entry(&super_blocks, struct super_block, s_list);
+ next = rcu_dereference(list_bidir_prev_rcu(&super_blocks));
+ else
+ next = READ_ONCE(super_blocks.next);
+ return list_entry(next, struct super_block, s_list);
}
static inline struct super_block *next_super(struct super_block *sb,
enum super_iter_flags_t flags)
{
+ struct list_head *next;
+
if (flags & SUPER_ITER_REVERSE)
- return list_prev_entry(sb, s_list);
- return list_next_entry(sb, s_list);
+ next = rcu_dereference(list_bidir_prev_rcu(&sb->s_list));
+ else
+ next = READ_ONCE(sb->s_list.next);
+ return list_entry(next, struct super_block, s_list);
}
static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
@@ -850,15 +858,15 @@ static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
struct super_block *sb, *p = NULL;
bool excl = flags & SUPER_ITER_EXCL;
- guard(spinlock)(&sb_lock);
-
+ rcu_read_lock();
for (sb = first_super(flags);
!list_entry_is_head(sb, &super_blocks, s_list);
sb = next_super(sb, flags)) {
if (super_flags(sb, SB_DYING))
continue;
- refcount_inc(&sb->s_count);
- spin_unlock(&sb_lock);
+ if (!refcount_inc_not_zero(&sb->s_count))
+ continue;
+ rcu_read_unlock();
if (flags & SUPER_ITER_UNLOCKED) {
f(sb, arg);
@@ -867,13 +875,14 @@ static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
super_unlock(sb, excl);
}
- spin_lock(&sb_lock);
if (p)
- __put_super(p);
+ put_super(p);
p = sb;
+ rcu_read_lock();
}
+ rcu_read_unlock();
if (p)
- __put_super(p);
+ put_super(p);
}
void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
@@ -895,15 +904,15 @@ void iterate_supers_type(struct file_system_type *type,
{
struct super_block *sb, *p = NULL;
- spin_lock(&sb_lock);
- hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(sb, &type->fs_supers, s_instances) {
bool locked;
if (super_flags(sb, SB_DYING))
continue;
-
- refcount_inc(&sb->s_count);
- spin_unlock(&sb_lock);
+ if (!refcount_inc_not_zero(&sb->s_count))
+ continue;
+ rcu_read_unlock();
locked = super_lock_shared(sb);
if (locked) {
@@ -911,14 +920,14 @@ void iterate_supers_type(struct file_system_type *type,
super_unlock_shared(sb);
}
- spin_lock(&sb_lock);
if (p)
- __put_super(p);
+ put_super(p);
p = sb;
+ rcu_read_lock();
}
+ rcu_read_unlock();
if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
+ put_super(p);
}
EXPORT_SYMBOL(iterate_supers_type);
@@ -927,25 +936,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
{
struct super_block *sb;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- bool locked;
-
+ rcu_read_lock();
+ list_for_each_entry_rcu(sb, &super_blocks, s_list) {
if (sb->s_dev != dev)
continue;
+ if (!refcount_inc_not_zero(&sb->s_count))
+ continue;
+ rcu_read_unlock();
- refcount_inc(&sb->s_count);
- spin_unlock(&sb_lock);
-
- locked = super_lock(sb, excl);
- if (locked)
+ if (super_lock(sb, excl))
return sb;
- spin_lock(&sb_lock);
- __put_super(sb);
- break;
+ put_super(sb);
+ return NULL;
}
- spin_unlock(&sb_lock);
+ rcu_read_unlock();
return NULL;
}
--
2.47.3
next prev parent reply other threads:[~2026-05-26 15:09 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-26 15:09 [PATCH 0/8] super: retire sget(), convert iterators to RCU Christian Brauner
2026-05-26 15:09 ` [PATCH 1/8] ext4: convert extents KUnit test to sget_fc() Christian Brauner
2026-05-26 15:09 ` [PATCH 2/8] ext4: convert mballoc " Christian Brauner
2026-05-27 0:47 ` Theodore Tso
2026-05-28 12:02 ` Christian Brauner
2026-06-03 13:52 ` Theodore Tso
2026-05-26 15:09 ` [PATCH 3/8] smb: client: convert cifs_smb3_do_mount() " Christian Brauner
2026-05-26 15:09 ` [PATCH 4/8] fs: retire sget() Christian Brauner
2026-05-26 15:09 ` [PATCH 5/8] super: drop sb_lock from setup_bdev_super() tuple publication Christian Brauner
2026-05-27 11:53 ` Christian Brauner
2026-05-26 15:09 ` [PATCH 6/8] super: convert sb->s_count to refcount_t Christian Brauner
2026-05-26 15:09 ` [PATCH 7/8] super: switch list manipulation to _rcu primitives Christian Brauner
2026-05-26 15:09 ` Christian Brauner [this message]
2026-05-27 11:54 ` [PATCH 0/8] super: retire sget(), convert iterators to RCU Christian Brauner
2026-05-28 11:18 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260526-work-sget-v1-8-263f7025cedd@kernel.org \
--to=brauner@kernel.org \
--cc=adilger.kernel@dilger.ca \
--cc=jack@suse.cz \
--cc=linux-cifs@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=ritesh.list@gmail.com \
--cc=tytso@mit.edu \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox