* [PATCH 1/2] btrfs: use bdev_rw_virt() to read and scratch the disk super block
2025-07-09 4:05 [PATCH 0/2] btrfs: do not poke into bdev's page cache Qu Wenruo
@ 2025-07-09 4:05 ` Qu Wenruo
2025-07-09 6:59 ` Johannes Thumshirn
2025-07-09 4:05 ` [PATCH 2/2] btrfs: do not poke into bdev's page cache for super block write Qu Wenruo
2025-07-09 7:43 ` [PATCH 0/2] btrfs: do not poke into bdev's page cache Johannes Thumshirn
2 siblings, 1 reply; 8+ messages in thread
From: Qu Wenruo @ 2025-07-09 4:05 UTC (permalink / raw)
To: linux-btrfs
Currently we're using the block device page cache to read and scratch
the super block.
This makes btrfs to poking into lower layer unnecessarily.
Modify the following routines by:
- Use kmalloc() + bdev_rw_virt() for btrfs_read_disk_super()
This means we can easily replace btrfs_release_disk_super() with a
simple kfree().
This also means there will no longer be any cached read for
btrfs_read_disk_super(), thus we can drop the @drop_cache parameter.
However this change brings a slightly behavior change for
btrfs_scan_one_device(), now every time the device is scanned, btrfs
will submit a read request, no more cached scan.
- Use bdev_rw_virt() for btrfs_scratch_superblock()
Just use the memory returned by btrfs_read_disk_super() and reset the
magic number.
Then use bdev_rw_virt() to do the write.
Since we're here, also make the error message cover the initial disk
super read error.
- Use kmalloc() and bdev_rw_virt() for sb_writer_pointer()
In zoned mode we have a corner case that both super block zones are
full, and we need to determine which zone to reuse.
In that case we need to read the last super block of both zones and
compare their generations.
Here we just use regular kmalloc() + bdev_rw_virt() to do the read.
And since we're here, simplify the error handling path by always
calling kfree() on both super blocks.
Since both super block pointers are initialized to NULL, we're safe to
call kfree() on them.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/disk-io.c | 10 +++----
fs/btrfs/super.c | 4 +--
fs/btrfs/volumes.c | 70 ++++++++++++++++++----------------------------
fs/btrfs/volumes.h | 4 +--
fs/btrfs/zoned.c | 28 +++++++++++--------
5 files changed, 51 insertions(+), 65 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 44e7ae4a2e0b..c4b020187249 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3320,7 +3320,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/*
* Read super block and check the signature bytes only
*/
- disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0, false);
+ disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0);
if (IS_ERR(disk_super)) {
ret = PTR_ERR(disk_super);
goto fail_alloc;
@@ -3336,7 +3336,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_err(fs_info, "unsupported checksum algorithm: %u",
csum_type);
ret = -EINVAL;
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
goto fail_alloc;
}
@@ -3344,7 +3344,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
ret = btrfs_init_csum_hash(fs_info, csum_type);
if (ret) {
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
goto fail_alloc;
}
@@ -3355,7 +3355,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (btrfs_check_super_csum(fs_info, disk_super)) {
btrfs_err(fs_info, "superblock checksum mismatch");
ret = -EINVAL;
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
goto fail_alloc;
}
@@ -3365,7 +3365,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* the whole block of INFO_SIZE
*/
memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy));
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
disk_super = fs_info->super_copy;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e4ce2754cfde..b6feddcf4510 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2309,7 +2309,7 @@ static int check_dev_super(struct btrfs_device *dev)
return 0;
/* Only need to check the primary super block. */
- sb = btrfs_read_disk_super(dev->bdev, 0, true);
+ sb = btrfs_read_disk_super(dev->bdev, 0);
if (IS_ERR(sb))
return PTR_ERR(sb);
@@ -2341,7 +2341,7 @@ static int check_dev_super(struct btrfs_device *dev)
goto out;
}
out:
- btrfs_release_disk_super(sb);
+ kfree(sb);
return ret;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 31aecd291d6c..03643c71addf 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -495,7 +495,7 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
}
}
invalidate_bdev(bdev);
- *disk_super = btrfs_read_disk_super(bdev, 0, false);
+ *disk_super = btrfs_read_disk_super(bdev, 0);
if (IS_ERR(*disk_super)) {
ret = PTR_ERR(*disk_super);
bdev_fput(*bdev_file);
@@ -716,12 +716,12 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
fs_devices->rw_devices++;
list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
}
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
return 0;
error_free_page:
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
bdev_fput(bdev_file);
return -EINVAL;
@@ -1326,20 +1326,11 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
return ret;
}
-void btrfs_release_disk_super(struct btrfs_super_block *super)
-{
- struct page *page = virt_to_page(super);
-
- put_page(page);
-}
-
struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
- int copy_num, bool drop_cache)
+ int copy_num)
{
struct btrfs_super_block *super;
- struct page *page;
u64 bytenr, bytenr_orig;
- struct address_space *mapping = bdev->bd_mapping;
int ret;
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -1353,26 +1344,19 @@ struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
return ERR_PTR(-EINVAL);
- if (drop_cache) {
- /* This should only be called with the primary sb. */
- ASSERT(copy_num == 0);
-
- /*
- * Drop the page of the primary superblock, so later read will
- * always read from the device.
- */
- invalidate_inode_pages2_range(mapping, bytenr >> PAGE_SHIFT,
- (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
+ super = kmalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
+ if (!super)
+ return ERR_PTR(-ENOMEM);
+ ret = bdev_rw_virt(bdev, bytenr >> SECTOR_SHIFT, super, BTRFS_SUPER_INFO_SIZE,
+ REQ_OP_READ);
+ if (ret < 0) {
+ kfree(super);
+ return ERR_PTR(ret);
}
- page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
- if (IS_ERR(page))
- return ERR_CAST(page);
-
- super = page_address(page);
if (btrfs_super_magic(super) != BTRFS_MAGIC ||
btrfs_super_bytenr(super) != bytenr_orig) {
- btrfs_release_disk_super(super);
+ kfree(super);
return ERR_PTR(-EINVAL);
}
@@ -1473,7 +1457,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
if (IS_ERR(bdev_file))
return ERR_CAST(bdev_file);
- disk_super = btrfs_read_disk_super(file_bdev(bdev_file), 0, false);
+ disk_super = btrfs_read_disk_super(file_bdev(bdev_file), 0);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
@@ -1495,7 +1479,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
btrfs_free_stale_devices(device->devt, device);
free_disk_super:
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
error_bdev_put:
bdev_fput(bdev_file);
@@ -2135,20 +2119,20 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
struct block_device *bdev, int copy_num)
{
struct btrfs_super_block *disk_super;
- const size_t len = sizeof(disk_super->magic);
const u64 bytenr = btrfs_sb_offset(copy_num);
int ret;
- disk_super = btrfs_read_disk_super(bdev, copy_num, false);
- if (IS_ERR(disk_super))
- return;
-
- memset(&disk_super->magic, 0, len);
- folio_mark_dirty(virt_to_folio(disk_super));
- btrfs_release_disk_super(disk_super);
-
- ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1);
- if (ret)
+ disk_super = btrfs_read_disk_super(bdev, copy_num);
+ if (IS_ERR(disk_super)) {
+ ret = PTR_ERR(disk_super);
+ goto out;
+ }
+ btrfs_set_super_magic(disk_super, 0);
+ ret = bdev_rw_virt(bdev, bytenr >> SECTOR_SHIFT, disk_super,
+ BTRFS_SUPER_INFO_SIZE, REQ_OP_WRITE);
+ kfree(disk_super);
+out:
+ if (ret < 0)
btrfs_warn(fs_info, "error clearing superblock number %d (%d)",
copy_num, ret);
}
@@ -2480,7 +2464,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
memcpy(args->fsid, disk_super->metadata_uuid, BTRFS_FSID_SIZE);
else
memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
- btrfs_release_disk_super(disk_super);
+ kfree(disk_super);
bdev_fput(bdev_file);
return 0;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7395cb5e1238..bf2e9a5a63ea 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -798,9 +798,7 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
u64 logical, u64 length);
void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
- int copy_num, bool drop_cache);
-void btrfs_release_disk_super(struct btrfs_super_block *super);
-
+ int copy_num);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 245e813ecd78..ce67f68075d7 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -117,23 +117,27 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
return -ENOENT;
} else if (full[0] && full[1]) {
/* Compare two super blocks */
- struct address_space *mapping = bdev->bd_mapping;
- struct page *page[BTRFS_NR_SB_LOG_ZONES];
- struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
+ struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES] = { 0 };
for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
BTRFS_SUPER_INFO_SIZE;
+ int ret;
- page[i] = read_cache_page_gfp(mapping,
- bytenr >> PAGE_SHIFT, GFP_NOFS);
- if (IS_ERR(page[i])) {
- if (i == 1)
- btrfs_release_disk_super(super[0]);
- return PTR_ERR(page[i]);
+ super[i] = kmalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
+ if (!super[i]) {
+ kfree(super[0]);
+ kfree(super[1]);
+ return -ENOMEM;
+ }
+ ret = bdev_rw_virt(bdev, bytenr >> SECTOR_SHIFT, super[i],
+ BTRFS_SUPER_INFO_SIZE, REQ_OP_READ);
+ if (ret < 0) {
+ kfree(super[0]);
+ kfree(super[1]);
+ return ret;
}
- super[i] = page_address(page[i]);
}
if (btrfs_super_generation(super[0]) >
@@ -142,8 +146,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
else
sector = zones[0].start;
- for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
- btrfs_release_disk_super(super[i]);
+ kfree(super[0]);
+ kfree(super[1]);
} else if (!full[0] && (empty[1] || full[1])) {
sector = zones[0].wp;
} else if (full[0]) {
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/2] btrfs: do not poke into bdev's page cache for super block write
2025-07-09 4:05 [PATCH 0/2] btrfs: do not poke into bdev's page cache Qu Wenruo
2025-07-09 4:05 ` [PATCH 1/2] btrfs: use bdev_rw_virt() to read and scratch the disk super block Qu Wenruo
@ 2025-07-09 4:05 ` Qu Wenruo
2025-07-09 7:43 ` [PATCH 0/2] btrfs: do not poke into bdev's page cache Johannes Thumshirn
2 siblings, 0 replies; 8+ messages in thread
From: Qu Wenruo @ 2025-07-09 4:05 UTC (permalink / raw)
To: linux-btrfs
[EXISTING BEHAVIOR]
Currently btrfs is utilizing block device's page cache to write super
blocks.
This has a long history, dating back to early days when we relies on the
page's flags to determine if the IO is done or failed.
But later commit bc00965dbff7 ("btrfs: count super block write errors in
device instead of tracking folio error state") uses extra atomic to
track how many errors are hit, and we no longer rely on page flags to
determine if the IO is done or failed.
[PROBLEMS]
But such direct usage of block devices' page cache is putting btrfs
responsible to the implementation details of the block device.
In fact this has already caused problem when the block device's page
cache has large folio support enabled, fixed by commit 65f2a3b2323e
("btrfs: remove folio order ASSERT()s in super block writeback path").
This should give us a warning that poking into other layer's
implementation is not a good idea.
[ENHANCEMENT]
Instead of directly using block device's page cache, use the regular bio
interfaces, this involves:
- Introduce extra buffer for all super blocks of a device
This is needed because we want to submit bios for all super blocks in
one go, and wait for them all.
This is to increase concurrency, thus we can not reuse a single super
block copy, and that's part of the reason we want to use page cache.
Unfortunately this means we will have extra 12K memory usage for each
btrfs device.
- Introduce ways to wait for super block writeback
Previously we rely on the page cache to wait for the IO to finish.
Now we have an atomic, @sb_write_running, to record how many running
super block writes are pending.
And also a wait queue head for waiting.
This greatly simplify wait_dev_supers(), now we only need to wait for
the pending ios to finish.
- Simplify btrfs_end_super_write() function
Now we don't need to bother releasing the folio, we can get rid of the
bio iteration code.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/disk-io.c | 76 ++++++++++++++--------------------------------
fs/btrfs/volumes.c | 13 ++++++++
fs/btrfs/volumes.h | 6 ++++
3 files changed, 42 insertions(+), 53 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c4b020187249..c93d7b908f66 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3692,27 +3692,23 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_super_write(struct bio *bio)
{
struct btrfs_device *device = bio->bi_private;
- struct folio_iter fi;
- bio_for_each_folio_all(fi, bio) {
- if (bio->bi_status) {
- btrfs_warn_rl(device->fs_info,
- "lost super block write due to IO error on %s (%d)",
- btrfs_dev_name(device),
- blk_status_to_errno(bio->bi_status));
- btrfs_dev_stat_inc_and_print(device,
- BTRFS_DEV_STAT_WRITE_ERRS);
- /* Ensure failure if the primary sb fails. */
- if (bio->bi_opf & REQ_FUA)
- atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
- &device->sb_write_errors);
- else
- atomic_inc(&device->sb_write_errors);
- }
- folio_unlock(fi.folio);
- folio_put(fi.folio);
+ if (bio->bi_status) {
+ btrfs_warn_rl(device->fs_info,
+ "lost super block write due to IO error on %s (%d)",
+ btrfs_dev_name(device),
+ blk_status_to_errno(bio->bi_status));
+ btrfs_dev_stat_inc_and_print(device,
+ BTRFS_DEV_STAT_WRITE_ERRS);
+ /* Ensure failure if the primary sb fails. */
+ if (bio->bi_opf & REQ_FUA)
+ atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
+ &device->sb_write_errors);
+ else
+ atomic_inc(&device->sb_write_errors);
}
-
+ if (atomic_dec_and_test(&device->sb_write_running))
+ wake_up(&device->sb_write_wait);
bio_put(bio);
}
@@ -3730,24 +3726,21 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
- struct address_space *mapping = device->bdev->bd_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int ret;
u64 bytenr, bytenr_orig;
atomic_set(&device->sb_write_errors, 0);
+ ASSERT(atomic_read(&device->sb_write_running) == 0);
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
-
+ ASSERT(max_mirrors <= BTRFS_SUPER_MIRROR_MAX);
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
- struct folio *folio;
struct bio *bio;
- struct btrfs_super_block *disk_super;
- size_t offset;
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
@@ -3769,21 +3762,7 @@ static int write_dev_supers(struct btrfs_device *device,
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
-
- folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- GFP_NOFS);
- if (IS_ERR(folio)) {
- btrfs_err(device->fs_info,
- "couldn't get super block page for bytenr %llu error %ld",
- bytenr, PTR_ERR(folio));
- atomic_inc(&device->sb_write_errors);
- continue;
- }
-
- offset = offset_in_folio(folio, bytenr);
- disk_super = folio_address(folio) + offset;
- memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
+ memcpy(device->sb_write_buf[i], sb, BTRFS_SUPER_INFO_SIZE);
/*
* Directly use bios here instead of relying on the page cache
@@ -3796,7 +3775,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
- bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset);
+ bio_add_virt_nofail(bio, device->sb_write_buf[i], BTRFS_SUPER_INFO_SIZE);
/*
* We FUA only the first super block. The others we allow to
@@ -3805,6 +3784,7 @@ static int write_dev_supers(struct btrfs_device *device,
*/
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
bio->bi_opf |= REQ_FUA;
+ atomic_inc(&device->sb_write_running);
submit_bio(bio);
if (btrfs_advance_sb_log(device, i))
@@ -3830,10 +3810,10 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
+ ASSERT(max_mirrors <= BTRFS_SUPER_MIRROR_MAX);
+ /* Calculate how many super blocks we really have. */
for (i = 0; i < max_mirrors; i++) {
- struct folio *folio;
-
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
if (ret == -ENOENT) {
break;
@@ -3846,17 +3826,8 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;
-
- folio = filemap_get_folio(device->bdev->bd_mapping,
- bytenr >> PAGE_SHIFT);
- /* If the folio has been removed, then we know it completed. */
- if (IS_ERR(folio))
- continue;
-
- /* Folio will be unlocked once the write completes. */
- folio_wait_locked(folio);
- folio_put(folio);
}
+ wait_event(device->sb_write_wait, atomic_read(&device->sb_write_running) == 0);
errors += atomic_read(&device->sb_write_errors);
if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
@@ -3866,7 +3837,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
device->devid);
return -1;
}
-
return errors < i ? 0 : -1;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 03643c71addf..be638774b9eb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -400,6 +400,7 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
+ WARN_ON(atomic_read(&device->sb_write_running) != 0);
/*
* No need to call kfree_rcu() nor do RCU lock/unlock, nothing is
* reading the device name.
@@ -407,6 +408,8 @@ static void btrfs_free_device(struct btrfs_device *device)
kfree(rcu_dereference_raw(device->name));
btrfs_extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
+ for (int i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++)
+ kfree(device->sb_write_buf[i]);
kfree(device);
}
@@ -6893,6 +6896,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&dev->post_commit_list);
atomic_set(&dev->dev_stats_ccnt, 0);
+ atomic_set(&dev->sb_write_running, 0);
+ init_waitqueue_head(&dev->sb_write_wait);
btrfs_device_data_ordered_init(dev);
btrfs_extent_io_tree_init(fs_info, &dev->alloc_state, IO_TREE_DEVICE_ALLOC_STATE);
@@ -6925,6 +6930,14 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
rcu_assign_pointer(dev->name, name);
}
+ for (int i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ dev->sb_write_buf[i] = kmalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
+ if (!dev->sb_write_buf[i]) {
+ btrfs_free_device(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
return dev;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bf2e9a5a63ea..830a8383b73b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,6 +20,7 @@
#include <linux/rbtree.h>
#include <uapi/linux/btrfs.h>
#include <uapi/linux/btrfs_tree.h>
+#include "disk-io.h"
#include "messages.h"
#include "extent-io-tree.h"
@@ -182,6 +183,11 @@ struct btrfs_device {
struct bio flush_bio;
struct completion flush_wait;
+ /* Buffer for each super block. */
+ struct btrfs_super_block *sb_write_buf[BTRFS_SUPER_MIRROR_MAX];
+ atomic_t sb_write_running;
+ wait_queue_head_t sb_write_wait;
+
/* per-device scrub information */
struct scrub_ctx *scrub_ctx;
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread