linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fs: Avoid grabbing sb->s_umount under bdev->bd_holder_lock
@ 2023-10-18 15:29 Jan Kara
  2023-10-18 15:46 ` Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Jan Kara @ 2023-10-18 15:29 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Christian Brauner, linux-fsdevel, Jan Kara

The implementation of bdev holder operations such as fs_bdev_mark_dead()
and fs_bdev_sync() grab sb->s_umount semaphore under
bdev->bd_holder_lock. This is problematic because it leads to
disk->open_mutex -> sb->s_umount lock ordering which is counterintuitive
(usually we grab higher level (e.g. filesystem) locks first and lower
level (e.g. block layer) locks later) and indeed makes lockdep complain
about possible locking cycles whenever we open a block device while
holding sb->s_umount semaphore. Implement a function
bdev_super_lock_shared() which safely transitions from holding
bdev->bd_holder_lock to holding sb->s_umount on alive superblock without
introducing the problematic lock dependency. We use this function
fs_bdev_sync() and fs_bdev_mark_dead().

Signed-off-by: Jan Kara <jack@suse.cz>
---
 block/bdev.c  |  5 +++--
 block/ioctl.c |  5 +++--
 fs/super.c    | 48 ++++++++++++++++++++++++++++++------------------
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index f3b13aa1b7d4..a9a485aae6b0 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -961,9 +961,10 @@ void bdev_mark_dead(struct block_device *bdev, bool surprise)
 	mutex_lock(&bdev->bd_holder_lock);
 	if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
 		bdev->bd_holder_ops->mark_dead(bdev, surprise);
-	else
+	else {
+		mutex_unlock(&bdev->bd_holder_lock);
 		sync_blockdev(bdev);
-	mutex_unlock(&bdev->bd_holder_lock);
+	}
 
 	invalidate_bdev(bdev);
 }
diff --git a/block/ioctl.c b/block/ioctl.c
index d5f5cd61efd7..fc492f9d34ae 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -370,9 +370,10 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
 	mutex_lock(&bdev->bd_holder_lock);
 	if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync)
 		bdev->bd_holder_ops->sync(bdev);
-	else
+	else {
+		mutex_unlock(&bdev->bd_holder_lock);
 		sync_blockdev(bdev);
-	mutex_unlock(&bdev->bd_holder_lock);
+	}
 
 	invalidate_bdev(bdev);
 	return 0;
diff --git a/fs/super.c b/fs/super.c
index 2d762ce67f6e..8b80d03e7cb4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1419,32 +1419,45 @@ EXPORT_SYMBOL(sget_dev);
 
 #ifdef CONFIG_BLOCK
 /*
- * Lock a super block that the callers holds a reference to.
+ * Lock the superblock that is holder of the bdev. Returns the superblock
+ * pointer if we successfully locked the superblock and it is alive. Otherwise
+ * we return NULL and just unlock bdev->bd_holder_lock.
  *
- * The caller needs to ensure that the super_block isn't being freed while
- * calling this function, e.g. by holding a lock over the call to this function
- * and the place that clears the pointer to the superblock used by this function
- * before freeing the superblock.
+ * The function must be called with bdev->bd_holder_lock and releases it.
  */
-static bool super_lock_shared_active(struct super_block *sb)
+static struct super_block *bdev_super_lock_shared(struct block_device *bdev)
+	__releases(&bdev->bd_holder_lock)
 {
-	bool born = super_lock_shared(sb);
+	struct super_block *sb = bdev->bd_holder;
+	bool born;
 
+	lockdep_assert_held(&bdev->bd_holder_lock);
+	/* Make sure sb doesn't go away from under us */
+	spin_lock(&sb_lock);
+	sb->s_count++;
+	spin_unlock(&sb_lock);
+	mutex_unlock(&bdev->bd_holder_lock);
+
+	born = super_lock_shared(sb);
 	if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
 		super_unlock_shared(sb);
-		return false;
+		put_super(sb);
+		return NULL;
 	}
-	return true;
+	/*
+	 * The superblock is active and we hold s_umount, we can drop our
+	 * temporary reference now.
+	 */
+	put_super(sb);
+	return sb;
 }
 
 static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 {
-	struct super_block *sb = bdev->bd_holder;
-
-	/* bd_holder_lock ensures that the sb isn't freed */
-	lockdep_assert_held(&bdev->bd_holder_lock);
+	struct super_block *sb;
 
-	if (!super_lock_shared_active(sb))
+	sb = bdev_super_lock_shared(bdev);
+	if (!sb)
 		return;
 
 	if (!surprise)
@@ -1459,11 +1472,10 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 
 static void fs_bdev_sync(struct block_device *bdev)
 {
-	struct super_block *sb = bdev->bd_holder;
-
-	lockdep_assert_held(&bdev->bd_holder_lock);
+	struct super_block *sb;
 
-	if (!super_lock_shared_active(sb))
+	sb = bdev_super_lock_shared(bdev);
+	if (!sb)
 		return;
 	sync_filesystem(sb);
 	super_unlock_shared(sb);
-- 
2.35.3


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-10-24  8:44 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-18 15:29 [PATCH] fs: Avoid grabbing sb->s_umount under bdev->bd_holder_lock Jan Kara
2023-10-18 15:46 ` Christoph Hellwig
2023-10-19  8:16 ` Christian Brauner
2023-10-19  8:33 ` Christian Brauner
2023-10-19 10:57   ` Jan Kara
2023-10-20 11:18     ` Christian Brauner
2023-10-19 13:40   ` Christoph Hellwig
2023-10-20 11:31     ` Christian Brauner
2023-10-20 12:04       ` Jan Kara
2023-10-23  7:40         ` Christian Brauner
2023-10-23 15:35           ` loop change deprecation bdev->bd_holder_lock Christian Brauner
2023-10-24  7:03             ` Christoph Hellwig
2023-10-24  8:44               ` loop change deprecation Christian Brauner
2023-10-23 14:08         ` LOOP_CONFIGURE uevents Christian Brauner
2023-10-24  7:06           ` Christoph Hellwig
2023-10-24  8:42             ` Christian Brauner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).