From: Christian Brauner <brauner@kernel.org>
To: Jan Kara <jack@suse.cz>, Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org, Christian Brauner <brauner@kernel.org>
Subject: [PATCH RFC 6/6] fs: add ->yield_devices()
Date: Tue, 24 Oct 2023 16:53:44 +0200 [thread overview]
Message-ID: <20231024-vfs-super-rework-v1-6-37a8aa697148@kernel.org> (raw)
In-Reply-To: <20231024-vfs-super-rework-v1-0-37a8aa697148@kernel.org>
and allow filesystems to mark devices as about to be released allowing
concurrent openers to wait until the device is reclaimable.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/ext4/super.c | 12 ++++++++++++
fs/super.c | 51 ++++++++++++++++++++-------------------------------
fs/xfs/xfs_super.c | 27 +++++++++++++++++++++++++++
include/linux/fs.h | 1 +
4 files changed, 60 insertions(+), 31 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e94df97ea440..45f550801329 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1477,6 +1477,17 @@ static void ext4_shutdown(struct super_block *sb)
ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
}
+static void ext4_yield_devices(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = sb->s_fs_info;
+ struct bdev_handle *journal_bdev_handle =
+ sbi ? sbi->s_journal_bdev_handle : NULL;
+
+ if (journal_bdev_handle)
+ bdev_yield(journal_bdev_handle);
+ bdev_yield(sb->s_bdev_handle);
+}
+
static void init_once(void *foo)
{
struct ext4_inode_info *ei = foo;
@@ -1638,6 +1649,7 @@ static const struct super_operations ext4_sops = {
.statfs = ext4_statfs,
.show_options = ext4_show_options,
.shutdown = ext4_shutdown,
+ .yield_devices = ext4_yield_devices,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
diff --git a/fs/super.c b/fs/super.c
index 4edde92d5e8f..7e24bbd65be2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -87,10 +87,10 @@ static inline bool wait_born(struct super_block *sb)
/*
* Pairs with smp_store_release() in super_wake() and ensures
- * that we see SB_BORN or SB_DYING after we're woken.
+ * that we see SB_BORN or SB_DEAD after we're woken.
*/
flags = smp_load_acquire(&sb->s_flags);
- return flags & (SB_BORN | SB_DYING);
+ return flags & (SB_BORN | SB_DEAD);
}
/**
@@ -101,12 +101,12 @@ static inline bool wait_born(struct super_block *sb)
* If the superblock has neither passed through vfs_get_tree() or
* generic_shutdown_super() yet wait for it to happen. Either superblock
* creation will succeed and SB_BORN is set by vfs_get_tree() or we're
- * woken and we'll see SB_DYING.
+ * woken and we'll see SB_DEAD.
*
* The caller must have acquired a temporary reference on @sb->s_count.
*
* Return: The function returns true if SB_BORN was set and with
- * s_umount held. The function returns false if SB_DYING was
+ * s_umount held. The function returns false if SB_DEAD was
* set and without s_umount held.
*/
static __must_check bool super_lock(struct super_block *sb, bool excl)
@@ -122,7 +122,7 @@ static __must_check bool super_lock(struct super_block *sb, bool excl)
* @sb->s_root is NULL and @sb->s_active is 0. No one needs to
* grab a reference to this. Tell them so.
*/
- if (sb->s_flags & SB_DYING) {
+ if (sb->s_flags & SB_DEAD) {
super_unlock(sb, excl);
return false;
}
@@ -137,7 +137,7 @@ static __must_check bool super_lock(struct super_block *sb, bool excl)
wait_var_event(&sb->s_flags, wait_born(sb));
/*
- * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
+ * Neither SB_BORN nor SB_DEAD are ever unset so we never loop.
* Just reacquire @sb->s_umount for the caller.
*/
goto relock;
@@ -439,18 +439,17 @@ void put_super(struct super_block *sb)
static void kill_super_notify(struct super_block *sb)
{
- lockdep_assert_not_held(&sb->s_umount);
+ const struct super_operations *sop = sb->s_op;
- /* already notified earlier */
- if (sb->s_flags & SB_DEAD)
- return;
+ lockdep_assert_held(&sb->s_umount);
+
+ /* Allow openers to wait for the devices to be cleaned up. */
+ if (sop->yield_devices)
+ sop->yield_devices(sb);
/*
* Remove it from @fs_supers so it isn't found by new
- * sget{_fc}() walkers anymore. Any concurrent mounter still
- * managing to grab a temporary reference is guaranteed to
- * already see SB_DYING and will wait until we notify them about
- * SB_DEAD.
+ * sget{_fc}() walkers anymore.
*/
spin_lock(&sb_lock);
hlist_del_init(&sb->s_instances);
@@ -459,7 +458,7 @@ static void kill_super_notify(struct super_block *sb)
/*
* Let concurrent mounts know that this thing is really dead.
* We don't need @sb->s_umount here as every concurrent caller
- * will see SB_DYING and either discard the superblock or wait
+ * will see SB_DEAD and either discard the superblock or wait
* for SB_DEAD.
*/
super_wake(sb, SB_DEAD);
@@ -483,8 +482,6 @@ void deactivate_locked_super(struct super_block *s)
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
- kill_super_notify(s);
-
/*
* Since list_lru_destroy() may sleep, we cannot call it from
* put_super(), where we hold the sb_lock. Therefore we destroy
@@ -583,7 +580,7 @@ static bool grab_super(struct super_block *sb)
bool super_trylock_shared(struct super_block *sb)
{
if (down_read_trylock(&sb->s_umount)) {
- if (!(sb->s_flags & SB_DYING) && sb->s_root &&
+ if (!(sb->s_flags & SB_DEAD) && sb->s_root &&
(sb->s_flags & SB_BORN))
return true;
super_unlock_shared(sb);
@@ -689,16 +686,9 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb->s_inode_list_lock);
}
}
- /*
- * Broadcast to everyone that grabbed a temporary reference to this
- * superblock before we removed it from @fs_supers that the superblock
- * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
- * discard this superblock and treat it as dead.
- *
- * We leave the superblock on @fs_supers so it can be found by
- * sget{_fc}() until we passed sb->kill_sb().
- */
- super_wake(sb, SB_DYING);
+
+ kill_super_notify(sb);
+
super_unlock_excl(sb);
if (sb->s_bdi != &noop_backing_dev_info) {
if (sb->s_iflags & SB_I_PERSB_BDI)
@@ -790,7 +780,7 @@ struct super_block *sget_fc(struct fs_context *fc,
/*
* Make the superblock visible on @super_blocks and @fs_supers.
* It's in a nascent state and users should wait on SB_BORN or
- * SB_DYING to be set.
+ * SB_DEAD to be set.
*/
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
@@ -906,7 +896,7 @@ static void __iterate_supers(void (*f)(struct super_block *))
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
/* Pairs with memory marrier in super_wake(). */
- if (smp_load_acquire(&sb->s_flags) & SB_DYING)
+ if (smp_load_acquire(&sb->s_flags) & SB_DEAD)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
@@ -1248,7 +1238,6 @@ void kill_anon_super(struct super_block *sb)
{
dev_t dev = sb->s_dev;
generic_shutdown_super(sb);
- kill_super_notify(sb);
free_anon_bdev(dev);
}
EXPORT_SYMBOL(kill_anon_super);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 84107d162e41..f7a0cb92c7c0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1170,6 +1170,32 @@ xfs_fs_shutdown(
xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
}
+static void xfs_fs_bdev_yield(struct bdev_handle *handle,
+ struct super_block *sb)
+{
+ if (handle != sb->s_bdev_handle)
+ bdev_yield(handle);
+}
+
+static void
+xfs_fs_yield_devices(
+ struct super_block *sb)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+
+ if (mp) {
+ if (mp->m_logdev_targp &&
+ mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_fs_bdev_yield(mp->m_logdev_targp->bt_bdev_handle, sb);
+ if (mp->m_rtdev_targp)
+ xfs_fs_bdev_yield(mp->m_rtdev_targp->bt_bdev_handle, sb);
+ if (mp->m_ddev_targp)
+ xfs_fs_bdev_yield(mp->m_ddev_targp->bt_bdev_handle, sb);
+ }
+
+ bdev_yield(sb->s_bdev_handle);
+}
+
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
@@ -1184,6 +1210,7 @@ static const struct super_operations xfs_super_operations = {
.nr_cached_objects = xfs_fs_nr_cached_objects,
.free_cached_objects = xfs_fs_free_cached_objects,
.shutdown = xfs_fs_shutdown,
+ .yield_devices = xfs_fs_yield_devices,
};
static int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5174e821d451..f0278bf4ca03 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2026,6 +2026,7 @@ struct super_operations {
long (*free_cached_objects)(struct super_block *,
struct shrink_control *);
void (*shutdown)(struct super_block *sb);
+ void (*yield_devices)(struct super_block *sb);
};
/*
--
2.34.1
next prev parent reply other threads:[~2023-10-24 14:54 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-24 14:53 [PATCH RFC 0/6] fs,block: yield devices Christian Brauner
2023-10-24 14:53 ` [PATCH RFC 1/6] fs: simplify setup_bdev_super() calls Christian Brauner
2023-10-25 15:29 ` Jan Kara
2023-10-27 6:42 ` Christoph Hellwig
2023-10-24 14:53 ` [PATCH RFC 2/6] xfs: simplify device handling Christian Brauner
2023-10-25 15:30 ` Jan Kara
2023-10-27 6:42 ` Christoph Hellwig
2023-10-24 14:53 ` [PATCH RFC 3/6] ext4: " Christian Brauner
2023-10-25 15:30 ` Jan Kara
2023-10-27 6:42 ` Christoph Hellwig
2023-10-24 14:53 ` [PATCH RFC 4/6] bdev: simplify waiting for concurrent claimers Christian Brauner
2023-10-25 15:54 ` Jan Kara
2023-10-27 7:21 ` Christoph Hellwig
2023-10-24 14:53 ` [PATCH RFC 5/6] block: mark device as about to be released Christian Brauner
2023-10-24 14:53 ` Christian Brauner [this message]
2023-10-25 17:20 ` [PATCH RFC 0/6] fs,block: yield devices Jan Kara
2023-10-25 20:46 ` Christian Brauner
2023-10-26 10:35 ` Jan Kara
2023-10-26 12:07 ` Christian Brauner
2023-10-26 13:04 ` Jan Kara
2023-10-26 15:08 ` Christian Brauner
2023-10-26 15:58 ` Jan Kara
2023-10-27 7:24 ` Christoph Hellwig
2023-10-26 11:50 ` (subset) " Christian Brauner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231024-vfs-super-rework-v1-6-37a8aa697148@kernel.org \
--to=brauner@kernel.org \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).