* [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests
@ 2022-11-04 14:12 Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device Johannes Thumshirn
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Johannes Thumshirn @ 2022-11-04 14:12 UTC (permalink / raw)
To: David Sterba; +Cc: Johannes Thumshirn, linux-btrfs, Naohiro Aota
I've started running fstests with an increased number of zoned devices in
SCRATCH_DEV_POOL and here's the fixes for the fallout of these.
All three patches are unrelated to each other (i.e. no dependencies).
Johannes Thumshirn (3):
btrfs: zoned: clone zoned device info when cloning a device
btrfs: zoned: init device's zone info for seeding
btrfs: zoned: fix locking imbalance on scrub
fs/btrfs/disk-io.c | 4 +++-
fs/btrfs/scrub.c | 1 -
fs/btrfs/volumes.c | 23 +++++++++++++++++++++--
fs/btrfs/volumes.h | 2 +-
fs/btrfs/zoned.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/zoned.h | 10 ++++++++++
6 files changed, 77 insertions(+), 5 deletions(-)
--
2.37.3
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device
2022-11-04 14:12 [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests Johannes Thumshirn
@ 2022-11-04 14:12 ` Johannes Thumshirn
2022-11-07 15:17 ` Naohiro Aota
2022-11-04 14:12 ` [PATCH 2/3] btrfs: zoned: init device's zone info for seeding Johannes Thumshirn
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Johannes Thumshirn @ 2022-11-04 14:12 UTC (permalink / raw)
To: David Sterba; +Cc: Johannes Thumshirn, linux-btrfs, Naohiro Aota
When cloning a btrfs_device, we're not cloning the associated
btrfs_zoned_device_info structure of the device in case of a zoned
filesystem.
This then later leads to a nullptr dereference when accessing the device's
zone_info for instance when setting a zone as active.
This was uncovered by fstests' testcase btrfs/161.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/volumes.c | 12 ++++++++++++
fs/btrfs/zoned.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/zoned.h | 10 ++++++++++
3 files changed, 64 insertions(+)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f81aa7f9ef19..f72ce52c67ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1021,6 +1021,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
rcu_assign_pointer(device->name, name);
}
+ if (orig_dev->zone_info) {
+ struct btrfs_zoned_device_info *zone_info;
+
+ zone_info = btrfs_clone_dev_zone_info(orig_dev);
+ if (!zone_info) {
+ btrfs_free_device(device);
+ ret = -ENOMEM;
+ goto error;
+ }
+ device->zone_info = zone_info;
+ }
+
list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
fs_devices->num_devices++;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 9d12a23e1a59..f830f70fc214 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -641,6 +641,48 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
device->zone_info = NULL;
}
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev)
+{
+ struct btrfs_zoned_device_info *zone_info;
+
+ zone_info = kmemdup(orig_dev->zone_info,
+ sizeof(*zone_info), GFP_KERNEL);
+ if (!zone_info)
+ return NULL;
+
+
+ zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->seq_zones)
+ goto out;
+
+ bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones,
+ zone_info->nr_zones);
+
+ zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->empty_zones)
+ goto out;
+
+ bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones,
+ zone_info->nr_zones);
+
+ zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->active_zones)
+ goto out;
+
+ bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones,
+ zone_info->nr_zones);
+ zone_info->zone_cache = NULL;
+
+ return zone_info;
+
+out:
+ bitmap_free(zone_info->seq_zones);
+ bitmap_free(zone_info->empty_zones);
+ bitmap_free(zone_info->active_zones);
+ kfree(zone_info);
+ return NULL;
+}
+
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
{
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 8d7e6be853c6..69fb1af89a53 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -37,6 +37,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
@@ -104,6 +105,15 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
+/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never
+ * call into btrfs_clone_dev_zone_info() so it's save to return NULL here.
+ */
+static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(
+ struct btrfs_device *orig_dev)
+{
+ return NULL;
+}
+
static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
{
if (!btrfs_is_zoned(fs_info))
--
2.37.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] btrfs: zoned: init device's zone info for seeding
2022-11-04 14:12 [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device Johannes Thumshirn
@ 2022-11-04 14:12 ` Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 3/3] btrfs: zoned: fix locking imbalance on scrub Johannes Thumshirn
2022-11-07 13:28 ` [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests David Sterba
3 siblings, 0 replies; 7+ messages in thread
From: Johannes Thumshirn @ 2022-11-04 14:12 UTC (permalink / raw)
To: David Sterba; +Cc: Johannes Thumshirn, linux-btrfs, Naohiro Aota
When performing seeding on a zoned filesystem it is necessary to
initialize each zoned device's btrfs_zoned_device_info structure,
otherwise mounting the filesystem will cause a nullptr dereference.
This was uncovered by fstests' testcase btrfs/163.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/disk-io.c | 4 +++-
fs/btrfs/volumes.c | 11 +++++++++--
fs/btrfs/volumes.h | 2 +-
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5c099d046170..f5f793af12a0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2564,7 +2564,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
fs_info->dev_root = root;
}
/* Initialize fs_info for all devices in any case */
- btrfs_init_devices_late(fs_info);
+ ret = btrfs_init_devices_late(fs_info);
+ if (ret)
+ goto out;
/*
* This tree can share blocks with some other fs tree during relocation
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f72ce52c67ce..2ee4050bf6c3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7769,10 +7769,11 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
return ret;
}
-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+int btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
struct btrfs_device *device;
+ int ret = 0;
fs_devices->fs_info = fs_info;
@@ -7781,12 +7782,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
device->fs_info = fs_info;
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
- list_for_each_entry(device, &seed_devs->devices, dev_list)
+ list_for_each_entry(device, &seed_devs->devices, dev_list) {
device->fs_info = fs_info;
+ ret = btrfs_get_dev_zone_info(device, false);
+ if (ret)
+ break;
+ }
seed_devs->fs_info = fs_info;
}
mutex_unlock(&fs_devices->device_list_mutex);
+
+ return ret;
}
static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6c7b5cf2de79..b05124e62412 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -681,7 +681,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_get_dev_stats *stats);
-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
+int btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
--
2.37.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] btrfs: zoned: fix locking imbalance on scrub
2022-11-04 14:12 [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 2/3] btrfs: zoned: init device's zone info for seeding Johannes Thumshirn
@ 2022-11-04 14:12 ` Johannes Thumshirn
2022-11-07 13:28 ` [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests David Sterba
3 siblings, 0 replies; 7+ messages in thread
From: Johannes Thumshirn @ 2022-11-04 14:12 UTC (permalink / raw)
To: David Sterba; +Cc: Johannes Thumshirn, linux-btrfs, Naohiro Aota
If we're doing device replace on a zoned filesystem and discover in
scrub_enumerate_chunks() that we don't have to copy the block-group it is
unlocked before it gets skipped.
But as the block-group hasn't yet been locked before it leads to a locking
imbalance. To fix this simply remove the unlock.
This was uncovered by fstests' testcase btrfs/163.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/scrub.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2c7053d20c14..65f6f04afdbe 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3932,7 +3932,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) {
- spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
}
--
2.37.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests
2022-11-04 14:12 [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests Johannes Thumshirn
` (2 preceding siblings ...)
2022-11-04 14:12 ` [PATCH 3/3] btrfs: zoned: fix locking imbalance on scrub Johannes Thumshirn
@ 2022-11-07 13:28 ` David Sterba
3 siblings, 0 replies; 7+ messages in thread
From: David Sterba @ 2022-11-07 13:28 UTC (permalink / raw)
To: Johannes Thumshirn; +Cc: David Sterba, linux-btrfs, Naohiro Aota
On Fri, Nov 04, 2022 at 07:12:32AM -0700, Johannes Thumshirn wrote:
> I've started running fstests with an increased number of zoned devices in
> SCRATCH_DEV_POOL and here's the fixes for the fallout of these.
>
> All three patches are unrelated to each other (i.e. no dependencies).
>
> Johannes Thumshirn (3):
> btrfs: zoned: clone zoned device info when cloning a device
> btrfs: zoned: init device's zone info for seeding
> btrfs: zoned: fix locking imbalance on scrub
Added to misc-next, thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device
2022-11-04 14:12 ` [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device Johannes Thumshirn
@ 2022-11-07 15:17 ` Naohiro Aota
2022-11-07 15:23 ` Johannes Thumshirn
0 siblings, 1 reply; 7+ messages in thread
From: Naohiro Aota @ 2022-11-07 15:17 UTC (permalink / raw)
To: Johannes Thumshirn; +Cc: David Sterba, linux-btrfs@vger.kernel.org
On Fri, Nov 04, 2022 at 07:12:33AM -0700, Johannes Thumshirn wrote:
> When cloning a btrfs_device, we're not cloning the associated
> btrfs_zoned_device_info structure of the device in case of a zoned
> filesystem.
>
> This then later leads to a nullptr dereference when accessing the device's
> zone_info for instance when setting a zone as active.
>
> This was uncovered by fstests' testcase btrfs/161.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> fs/btrfs/volumes.c | 12 ++++++++++++
> fs/btrfs/zoned.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/zoned.h | 10 ++++++++++
> 3 files changed, 64 insertions(+)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index f81aa7f9ef19..f72ce52c67ce 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1021,6 +1021,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
> rcu_assign_pointer(device->name, name);
> }
>
> + if (orig_dev->zone_info) {
> + struct btrfs_zoned_device_info *zone_info;
> +
> + zone_info = btrfs_clone_dev_zone_info(orig_dev);
> + if (!zone_info) {
> + btrfs_free_device(device);
> + ret = -ENOMEM;
> + goto error;
> + }
> + device->zone_info = zone_info;
> + }
> +
> list_add(&device->dev_list, &fs_devices->devices);
> device->fs_devices = fs_devices;
> fs_devices->num_devices++;
This part looks good.
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 9d12a23e1a59..f830f70fc214 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -641,6 +641,48 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
> device->zone_info = NULL;
> }
>
> +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev)
I have a concern about this function. Since this function duplicates the
zone_info, it might cause a split-brain state of zone_info. Actually, that
won't happen because the current callers are the seeding device
functions. And, another solution, reference counting the zone_info, is not
an option as it is only used for the seeding case.
So, this function is safe only when a caller ensures read-only access to
the copied zone_info. I'd like to have a comment here so future developer
notices the proper usage.
> +{
> + struct btrfs_zoned_device_info *zone_info;
> +
> + zone_info = kmemdup(orig_dev->zone_info,
> + sizeof(*zone_info), GFP_KERNEL);
> + if (!zone_info)
> + return NULL;
> +
> +
> + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> + if (!zone_info->seq_zones)
> + goto out;
> +
> + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones,
> + zone_info->nr_zones);
> +
> + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> + if (!zone_info->empty_zones)
> + goto out;
> +
> + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones,
> + zone_info->nr_zones);
> +
> + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> + if (!zone_info->active_zones)
> + goto out;
> +
> + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones,
> + zone_info->nr_zones);
> + zone_info->zone_cache = NULL;
> +
> + return zone_info;
> +
> +out:
> + bitmap_free(zone_info->seq_zones);
> + bitmap_free(zone_info->empty_zones);
> + bitmap_free(zone_info->active_zones);
> + kfree(zone_info);
> + return NULL;
> +}
> +
> int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
> struct blk_zone *zone)
> {
> diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
> index 8d7e6be853c6..69fb1af89a53 100644
> --- a/fs/btrfs/zoned.h
> +++ b/fs/btrfs/zoned.h
> @@ -37,6 +37,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
> int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
> int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
> void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
> +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
> int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
> int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
> int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
> @@ -104,6 +105,15 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
>
> static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
>
> +/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never
> + * call into btrfs_clone_dev_zone_info() so it's save to return NULL here.
> + */
> +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(
> + struct btrfs_device *orig_dev)
> +{
> + return NULL;
> +}
> +
> static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
> {
> if (!btrfs_is_zoned(fs_info))
> --
> 2.37.3
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device
2022-11-07 15:17 ` Naohiro Aota
@ 2022-11-07 15:23 ` Johannes Thumshirn
0 siblings, 0 replies; 7+ messages in thread
From: Johannes Thumshirn @ 2022-11-07 15:23 UTC (permalink / raw)
To: Naohiro Aota; +Cc: David Sterba, linux-btrfs@vger.kernel.org
On 07.11.22 16:17, Naohiro Aota wrote:
>
> I have a concern about this function. Since this function duplicates the
> zone_info, it might cause a split-brain state of zone_info. Actually, that
> won't happen because the current callers are the seeding device
> functions. And, another solution, reference counting the zone_info, is not
> an option as it is only used for the seeding case.
>
> So, this function is safe only when a caller ensures read-only access to
> the copied zone_info. I'd like to have a comment here so future developer
> notices the proper usage.
But isn't this the same for all other fields of btrfs_device? Given it's
only called by clone_fs_devices() which in turn is only called by either
open_seed_devices() or btrfs_init_sprout() it's a save context.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2022-11-07 15:23 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-04 14:12 [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 1/3] btrfs: zoned: clone zoned device info when cloning a device Johannes Thumshirn
2022-11-07 15:17 ` Naohiro Aota
2022-11-07 15:23 ` Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 2/3] btrfs: zoned: init device's zone info for seeding Johannes Thumshirn
2022-11-04 14:12 ` [PATCH 3/3] btrfs: zoned: fix locking imbalance on scrub Johannes Thumshirn
2022-11-07 13:28 ` [PATCH 0/3] btrfs: zoned: misc fixes for problems uncovered by fstests David Sterba
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox