From: Anand Jain <anand.jain@oracle.com>
To: linux-btrfs@vger.kernel.org
Cc: nborisov@suse.com
Subject: [PATCH v7] btrfs: consolidate device_list_mutex in prepare_sprout to its parent
Date: Thu, 30 Sep 2021 20:16:27 +0800 [thread overview]
Message-ID: <6585e7d938e6600189c1bc7b61a7c76badef18dd.1633003671.git.anand.jain@oracle.com> (raw)
btrfs_prepare_sprout() splices seed devices into its own struct fs_devices,
so that its parent function btrfs_init_new_device() can add the new sprout
device to fs_info->fs_devices.
Both btrfs_prepare_sprout() and btrfs_init_new_device() needs
device_list_mutex. But they are holding it sequentially, thus creates a
small window to an opportunity to race. Close this opportunity and hold
device_list_mutex common to both btrfs_init_new_device() and
btrfs_prepare_sprout().
This patch splits btrfs_prepare_sprout() into btrfs_alloc_sprout() and
btrfs_splice_sprout(). This split is essential because device_list_mutex
shouldn't be held for btrfs_alloc_sprout() but must be held for
btrfs_splice_sprout(). So now a common device_list_mutex can be used
between btrfs_init_new_device() and btrfs_splice_sprout().
This patch also moves the lockdep_assert_held(&uuid_mutex) from the
starting of the function to just above the line where we need this lock.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v7:
. Not part of the patchset "btrfs: cleanup prepare_sprout" anymore as
1/3 is merged and 2/3 is dropped.
. Rename btrfs_alloc_sprout() to btrfs_init_sprout() as it does more
than just alloc and change return to btrfs_device *.
. Rename btrfs_splice_sprout() to btrfs_setup_sprout() as it does more
than just the splice.
. Add lockdep_assert_held(&uuid_mutex) and
lockdep_assert_held(&fs_devices->device_list_mutex) in btrfs_setup_sprout().
v6:
Remove RFC.
Split btrfs_prepare_sprout so that the allocation part can be outside
of the device_list_mutex in the parent function btrfs_init_new_device().
fs/btrfs/volumes.c | 73 +++++++++++++++++++++++++++++++++-------------
1 file changed, 53 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e2b76b5fd14..10227b13a1a6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2378,21 +2378,14 @@ struct btrfs_device *btrfs_find_device_by_devspec(
return btrfs_find_device_by_path(fs_info, device_path);
}
-/*
- * does all the dirty work required for changing file system's UUID.
- */
-static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
+static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_fs_devices *old_devices;
struct btrfs_fs_devices *seed_devices;
- struct btrfs_super_block *disk_super = fs_info->super_copy;
- struct btrfs_device *device;
- u64 super_flags;
- lockdep_assert_held(&uuid_mutex);
if (!fs_devices->seeding)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
/*
* Private copy of the seed devices, anchored at
@@ -2400,7 +2393,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
*/
seed_devices = alloc_fs_devices(NULL, NULL);
if (IS_ERR(seed_devices))
- return PTR_ERR(seed_devices);
+ return seed_devices;
/*
* It's necessary to retain a copy of the original seed fs_devices in
@@ -2411,9 +2404,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
old_devices = clone_fs_devices(fs_devices);
if (IS_ERR(old_devices)) {
kfree(seed_devices);
- return PTR_ERR(old_devices);
+ return old_devices;
}
+ lockdep_assert_held(&uuid_mutex);
list_add(&old_devices->fs_list, &fs_uuids);
memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
@@ -2422,7 +2416,41 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&seed_devices->alloc_list);
mutex_init(&seed_devices->device_list_mutex);
- mutex_lock(&fs_devices->device_list_mutex);
+ return seed_devices;
+}
+
+/*
+ * Splice seed devices into the sprout fs_devices.
+ * Generate a new fsid for the sprouted readwrite btrfs.
+ */
+static void btrfs_setup_sprout(struct btrfs_fs_info *fs_info,
+ struct btrfs_fs_devices *seed_devices)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_super_block *disk_super = fs_info->super_copy;
+ struct btrfs_device *device;
+ u64 super_flags;
+
+ /*
+ * We are updating the fsid, the thread leading to device_list_add()
+ * could race, so uuid_mutex is needed.
+ */
+ lockdep_assert_held(&uuid_mutex);
+
+ /*
+ * Below threads though they parse dev_list they are fine without
+ * device_list_mutex:
+ * All device ops and balance - as we are in btrfs_exclop_start.
+ * Various dev_list read parser - are using rcu.
+ * btrfs_ioctl_fitrim() - is using rcu.
+ *
+ * For-read threads as below are using device_list_mutex:
+ * Readonly scrub btrfs_scrub_dev()
+ * Readonly scrub btrfs_scrub_progress()
+ * btrfs_get_dev_stats()
+ */
+ lockdep_assert_held(&fs_devices->device_list_mutex);
+
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2438,13 +2466,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
generate_random_uuid(fs_devices->fsid);
memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- mutex_unlock(&fs_devices->device_list_mutex);
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
btrfs_set_super_flags(disk_super, super_flags);
-
- return 0;
}
/*
@@ -2532,6 +2557,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct super_block *sb = fs_info->sb;
struct rcu_string *name;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_fs_devices *seed_devices;
u64 orig_super_total_bytes;
u64 orig_super_num_devices;
int ret = 0;
@@ -2615,18 +2641,25 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) {
btrfs_clear_sb_rdonly(sb);
- ret = btrfs_prepare_sprout(fs_info);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
+
+ /* GFP_KERNEL alloc should not be under device_list_mutex */
+ seed_devices = btrfs_init_sprout(fs_info);
+ if (IS_ERR(seed_devices)) {
+ btrfs_abort_transaction(trans, (int)PTR_ERR(seed_devices));
goto error_trans;
}
+ }
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ if (seeding_dev) {
+ btrfs_setup_sprout(fs_info, seed_devices);
+
btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
device);
}
device->fs_devices = fs_devices;
- mutex_lock(&fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
list_add_rcu(&device->dev_list, &fs_devices->devices);
list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
@@ -2688,7 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
/*
* fs_devices now represents the newly sprouted filesystem and
- * its fsid has been changed by btrfs_prepare_sprout
+ * its fsid has been changed by btrfs_sprout_splice().
*/
btrfs_sysfs_update_sprout_fsid(fs_devices);
}
--
2.31.1
WARNING: multiple messages have this Message-ID (diff)
From: Anand Jain <anand.jain@oracle.com>
To: linux-btrfs@vger.kernel.org, dsterba@suse.com
Subject: [PATCH v8] btrfs: consolidate device_list_mutex in prepare_sprout to its parent
Date: Wed, 13 Oct 2021 16:01:37 +0800 [thread overview]
Message-ID: <6585e7d938e6600189c1bc7b61a7c76badef18dd.1633003671.git.anand.jain@oracle.com> (raw)
Message-ID: <20211013080137.Bbt1omPCnM-ICZCnqrgjTq-2Rj4YbsM6OCm1MMBtG4o@z> (raw)
btrfs_prepare_sprout() splices seed devices into its own struct fs_devices,
so that its parent function btrfs_init_new_device() can add the new sprout
device to fs_info->fs_devices.
Both btrfs_prepare_sprout() and btrfs_init_new_device() needs
device_list_mutex. But they are holding it sequentially, thus creates a
small window to an opportunity to race. Close this opportunity and hold
device_list_mutex common to both btrfs_init_new_device() and
btrfs_prepare_sprout().
This patch splits btrfs_prepare_sprout() into btrfs_init_sprout() and
btrfs_setup_sprout(). This split is essential because device_list_mutex
shouldn't be held for allocs in btrfs_init_sprout() but must be held for
btrfs_setup_sprout(). So now a common device_list_mutex can be used
between btrfs_init_new_device() and btrfs_setup_sprout().
This patch also moves the lockdep_assert_held(&uuid_mutex) from the
starting of the function to just above the line where we need this lock.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v8:
Change log update:
s/btrfs_alloc_sprout/btrfs_init_sprout/g
s/btrfs_splice_sprout/btrfs_setup_sprout/g
No code change.
v7:
. Not part of the patchset "btrfs: cleanup prepare_sprout" anymore as
1/3 is merged and 2/3 is dropped.
. Rename btrfs_alloc_sprout() to btrfs_init_sprout() as it does more
than just alloc and change return to btrfs_device *.
. Rename btrfs_splice_sprout() to btrfs_setup_sprout() as it does more
than just the splice.
. Add lockdep_assert_held(&uuid_mutex) and
lockdep_assert_held(&fs_devices->device_list_mutex) in btrfs_setup_sprout().
v6:
Remove RFC.
Split btrfs_prepare_sprout so that the allocation part can be outside
of the device_list_mutex in the parent function btrfs_init_new_device().
fs/btrfs/volumes.c | 73 +++++++++++++++++++++++++++++++++-------------
1 file changed, 53 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e2b76b5fd14..10227b13a1a6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2378,21 +2378,14 @@ struct btrfs_device *btrfs_find_device_by_devspec(
return btrfs_find_device_by_path(fs_info, device_path);
}
-/*
- * does all the dirty work required for changing file system's UUID.
- */
-static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
+static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_fs_devices *old_devices;
struct btrfs_fs_devices *seed_devices;
- struct btrfs_super_block *disk_super = fs_info->super_copy;
- struct btrfs_device *device;
- u64 super_flags;
- lockdep_assert_held(&uuid_mutex);
if (!fs_devices->seeding)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
/*
* Private copy of the seed devices, anchored at
@@ -2400,7 +2393,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
*/
seed_devices = alloc_fs_devices(NULL, NULL);
if (IS_ERR(seed_devices))
- return PTR_ERR(seed_devices);
+ return seed_devices;
/*
* It's necessary to retain a copy of the original seed fs_devices in
@@ -2411,9 +2404,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
old_devices = clone_fs_devices(fs_devices);
if (IS_ERR(old_devices)) {
kfree(seed_devices);
- return PTR_ERR(old_devices);
+ return old_devices;
}
+ lockdep_assert_held(&uuid_mutex);
list_add(&old_devices->fs_list, &fs_uuids);
memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
@@ -2422,7 +2416,41 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&seed_devices->alloc_list);
mutex_init(&seed_devices->device_list_mutex);
- mutex_lock(&fs_devices->device_list_mutex);
+ return seed_devices;
+}
+
+/*
+ * Splice seed devices into the sprout fs_devices.
+ * Generate a new fsid for the sprouted readwrite btrfs.
+ */
+static void btrfs_setup_sprout(struct btrfs_fs_info *fs_info,
+ struct btrfs_fs_devices *seed_devices)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_super_block *disk_super = fs_info->super_copy;
+ struct btrfs_device *device;
+ u64 super_flags;
+
+ /*
+ * We are updating the fsid, the thread leading to device_list_add()
+ * could race, so uuid_mutex is needed.
+ */
+ lockdep_assert_held(&uuid_mutex);
+
+ /*
+ * Below threads though they parse dev_list they are fine without
+ * device_list_mutex:
+ * All device ops and balance - as we are in btrfs_exclop_start.
+ * Various dev_list read parser - are using rcu.
+ * btrfs_ioctl_fitrim() - is using rcu.
+ *
+ * For-read threads as below are using device_list_mutex:
+ * Readonly scrub btrfs_scrub_dev()
+ * Readonly scrub btrfs_scrub_progress()
+ * btrfs_get_dev_stats()
+ */
+ lockdep_assert_held(&fs_devices->device_list_mutex);
+
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2438,13 +2466,10 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
generate_random_uuid(fs_devices->fsid);
memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- mutex_unlock(&fs_devices->device_list_mutex);
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
btrfs_set_super_flags(disk_super, super_flags);
-
- return 0;
}
/*
@@ -2532,6 +2557,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct super_block *sb = fs_info->sb;
struct rcu_string *name;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_fs_devices *seed_devices;
u64 orig_super_total_bytes;
u64 orig_super_num_devices;
int ret = 0;
@@ -2615,18 +2641,25 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (seeding_dev) {
btrfs_clear_sb_rdonly(sb);
- ret = btrfs_prepare_sprout(fs_info);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
+
+ /* GFP_KERNEL alloc should not be under device_list_mutex */
+ seed_devices = btrfs_init_sprout(fs_info);
+ if (IS_ERR(seed_devices)) {
+ btrfs_abort_transaction(trans, (int)PTR_ERR(seed_devices));
goto error_trans;
}
+ }
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ if (seeding_dev) {
+ btrfs_setup_sprout(fs_info, seed_devices);
+
btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev,
device);
}
device->fs_devices = fs_devices;
- mutex_lock(&fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
list_add_rcu(&device->dev_list, &fs_devices->devices);
list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
@@ -2688,7 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
/*
* fs_devices now represents the newly sprouted filesystem and
- * its fsid has been changed by btrfs_prepare_sprout
+ * its fsid has been changed by btrfs_sprout_splice().
*/
btrfs_sysfs_update_sprout_fsid(fs_devices);
}
--
2.31.1
next reply other threads:[~2021-09-30 12:16 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-30 12:16 Anand Jain [this message]
2021-09-30 13:42 ` [PATCH v7] btrfs: consolidate device_list_mutex in prepare_sprout to its parent Anand Jain
2021-10-13 8:01 ` [PATCH v8] " Anand Jain
2021-10-14 15:30 ` Josef Bacik
2021-10-14 22:34 ` Anand Jain
2021-11-08 20:02 ` David Sterba
2021-11-09 8:05 ` Anand Jain
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6585e7d938e6600189c1bc7b61a7c76badef18dd.1633003671.git.anand.jain@oracle.com \
--to=anand.jain@oracle.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=nborisov@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).