Linux EXT4 FS development
 help / color / mirror / Atom feed
From: Christian Brauner <brauner@kernel.org>
To: linux-fsdevel@vger.kernel.org
Cc: Theodore Ts'o <tytso@mit.edu>,
	 Andreas Dilger <adilger.kernel@dilger.ca>,
	Jan Kara <jack@suse.cz>,
	 "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>,
	linux-ext4@vger.kernel.org,  linux-cifs@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	 "Christian Brauner (Amutable)" <brauner@kernel.org>
Subject: [PATCH 8/8] super: convert iterators to RCU readers + refcount_inc_not_zero
Date: Tue, 26 May 2026 17:09:10 +0200	[thread overview]
Message-ID: <20260526-work-sget-v1-8-263f7025cedd@kernel.org> (raw)
In-Reply-To: <20260526-work-sget-v1-0-263f7025cedd@kernel.org>

Walk @super_blocks and @fs_supers under rcu_read_lock() and pin the
current entry with refcount_inc_not_zero() instead of holding sb_lock
across the cursor advance. sb_lock was only there to keep the
cursor's ->next / ->prev pointer from being mutated by concurrent
list_del / list_add. RCU semantics give us that guarantee directly:
list_bidir_del_rcu() preserves both ->next and ->prev on the
unlinked entry and list_add_tail_rcu() publishes new entries with
the release barrier set up by the previous patch.

The pattern at each iterator is:

    rcu_read_lock();
    list_for_each_entry_rcu(sb, ...) {
            if (SB_DYING)                             continue;
            if (!refcount_inc_not_zero(&sb->s_count)) continue;
            rcu_read_unlock();

            ...                       /* may sleep on s_umount */

            if (prev)
                    put_super(prev);
            prev = sb;
            rcu_read_lock();          /* prev pinned: prev->{next,prev} valid */
    }
    rcu_read_unlock();
    if (prev)
            put_super(prev);

While we hold a pin on @prev, __put_super() cannot reach the
refcount_dec_and_test() transition that drives list_bidir_del_rcu().
So @prev stays on the list and concurrent list_bidir_del_rcu() of
other entries keeps @prev->s_list.{next,prev} pointing at the still-
live neighbour (or the head sentinel). The cursor advance after
re-acquiring rcu_read_lock() is therefore always against a live
chain in whichever direction we're walking.

put_super() now appears in the middle of the loop where __put_super()
used to be called with sb_lock held. It briefly takes sb_lock for
the trailing-ref drop; in the common case dec_and_test() returns
false and the lock is held for only a handful of cycles.

first_super() and next_super() switch the forward arm to READ_ONCE()
on the head and cursor ->next pointers and the reverse arm to
rcu_dereference(list_bidir_prev_rcu(...)). The forward arm matches
the semantics of list_entry_rcu() used internally by
list_for_each_entry_rcu(); the reverse arm is the canonical
bidirectional-RCU traversal pattern (see kernel/nstree.c) and is
needed because filesystems_freeze() and do_emergency_remount() pass
SUPER_ITER_REVERSE.

iterate_supers_type() and user_get_super() get the same treatment.
user_get_super() simplifies further: on lookup hit we return with
the pin; on lookup miss followed by SB_DYING discovery we put_super()
and return NULL.

sget_fc() and grab_super() are not touched here.

Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
---
 fs/super.c | 71 +++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 8c01b95be717..d9b1148f7030 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -831,17 +831,25 @@ enum super_iter_flags_t {
 
 static inline struct super_block *first_super(enum super_iter_flags_t flags)
 {
+	struct list_head *next;
+
 	if (flags & SUPER_ITER_REVERSE)
-		return list_last_entry(&super_blocks, struct super_block, s_list);
-	return list_first_entry(&super_blocks, struct super_block, s_list);
+		next = rcu_dereference(list_bidir_prev_rcu(&super_blocks));
+	else
+		next = READ_ONCE(super_blocks.next);
+	return list_entry(next, struct super_block, s_list);
 }
 
 static inline struct super_block *next_super(struct super_block *sb,
 					     enum super_iter_flags_t flags)
 {
+	struct list_head *next;
+
 	if (flags & SUPER_ITER_REVERSE)
-		return list_prev_entry(sb, s_list);
-	return list_next_entry(sb, s_list);
+		next = rcu_dereference(list_bidir_prev_rcu(&sb->s_list));
+	else
+		next = READ_ONCE(sb->s_list.next);
+	return list_entry(next, struct super_block, s_list);
 }
 
 static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
@@ -850,15 +858,15 @@ static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
 	struct super_block *sb, *p = NULL;
 	bool excl = flags & SUPER_ITER_EXCL;
 
-	guard(spinlock)(&sb_lock);
-
+	rcu_read_lock();
 	for (sb = first_super(flags);
 	     !list_entry_is_head(sb, &super_blocks, s_list);
 	     sb = next_super(sb, flags)) {
 		if (super_flags(sb, SB_DYING))
 			continue;
-		refcount_inc(&sb->s_count);
-		spin_unlock(&sb_lock);
+		if (!refcount_inc_not_zero(&sb->s_count))
+			continue;
+		rcu_read_unlock();
 
 		if (flags & SUPER_ITER_UNLOCKED) {
 			f(sb, arg);
@@ -867,13 +875,14 @@ static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
 			super_unlock(sb, excl);
 		}
 
-		spin_lock(&sb_lock);
 		if (p)
-			__put_super(p);
+			put_super(p);
 		p = sb;
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 	if (p)
-		__put_super(p);
+		put_super(p);
 }
 
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
@@ -895,15 +904,15 @@ void iterate_supers_type(struct file_system_type *type,
 {
 	struct super_block *sb, *p = NULL;
 
-	spin_lock(&sb_lock);
-	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(sb, &type->fs_supers, s_instances) {
 		bool locked;
 
 		if (super_flags(sb, SB_DYING))
 			continue;
-
-		refcount_inc(&sb->s_count);
-		spin_unlock(&sb_lock);
+		if (!refcount_inc_not_zero(&sb->s_count))
+			continue;
+		rcu_read_unlock();
 
 		locked = super_lock_shared(sb);
 		if (locked) {
@@ -911,14 +920,14 @@ void iterate_supers_type(struct file_system_type *type,
 			super_unlock_shared(sb);
 		}
 
-		spin_lock(&sb_lock);
 		if (p)
-			__put_super(p);
+			put_super(p);
 		p = sb;
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 	if (p)
-		__put_super(p);
-	spin_unlock(&sb_lock);
+		put_super(p);
 }
 
 EXPORT_SYMBOL(iterate_supers_type);
@@ -927,25 +936,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
 {
 	struct super_block *sb;
 
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		bool locked;
-
+	rcu_read_lock();
+	list_for_each_entry_rcu(sb, &super_blocks, s_list) {
 		if (sb->s_dev != dev)
 			continue;
+		if (!refcount_inc_not_zero(&sb->s_count))
+			continue;
+		rcu_read_unlock();
 
-		refcount_inc(&sb->s_count);
-		spin_unlock(&sb_lock);
-
-		locked = super_lock(sb, excl);
-		if (locked)
+		if (super_lock(sb, excl))
 			return sb;
 
-		spin_lock(&sb_lock);
-		__put_super(sb);
-		break;
+		put_super(sb);
+		return NULL;
 	}
-	spin_unlock(&sb_lock);
+	rcu_read_unlock();
 	return NULL;
 }
 

-- 
2.47.3


  parent reply	other threads:[~2026-05-26 15:09 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-26 15:09 [PATCH 0/8] super: retire sget(), convert iterators to RCU Christian Brauner
2026-05-26 15:09 ` [PATCH 1/8] ext4: convert extents KUnit test to sget_fc() Christian Brauner
2026-05-26 15:09 ` [PATCH 2/8] ext4: convert mballoc " Christian Brauner
2026-05-27  0:47   ` Theodore Tso
2026-05-28 12:02     ` Christian Brauner
2026-06-03 13:52       ` Theodore Tso
2026-05-26 15:09 ` [PATCH 3/8] smb: client: convert cifs_smb3_do_mount() " Christian Brauner
2026-05-26 15:09 ` [PATCH 4/8] fs: retire sget() Christian Brauner
2026-05-26 15:09 ` [PATCH 5/8] super: drop sb_lock from setup_bdev_super() tuple publication Christian Brauner
2026-05-27 11:53   ` Christian Brauner
2026-05-26 15:09 ` [PATCH 6/8] super: convert sb->s_count to refcount_t Christian Brauner
2026-05-26 15:09 ` [PATCH 7/8] super: switch list manipulation to _rcu primitives Christian Brauner
2026-05-26 15:09 ` Christian Brauner [this message]
2026-05-27 11:54 ` [PATCH 0/8] super: retire sget(), convert iterators to RCU Christian Brauner
2026-05-28 11:18 ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260526-work-sget-v1-8-263f7025cedd@kernel.org \
    --to=brauner@kernel.org \
    --cc=adilger.kernel@dilger.ca \
    --cc=jack@suse.cz \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=ritesh.list@gmail.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox