From: David Sterba <dsterba@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: willy@infradead.org, nborisov@suse.com, David Sterba <dsterba@suse.com>
Subject: [PATCH v2] btrfs: use preallocated pages for super block write
Date: Thu, 9 Jun 2022 18:46:29 +0200 [thread overview]
Message-ID: <20220609164629.30316-1-dsterba@suse.com> (raw)
Currently the super block page is from the mapping of the block device,
this is result of direct conversion from the previous buffer_head to bio
API. We don't use the page cache or the mapping anywhere else, the page
is a temporary space for the associated bio.
Allocate pages for all super block copies at device allocation time,
also to avoid any later allocation problems when writing the super
block. This simplifies the page reference tracking, but the page lock is
still used as waiting mechanism for the write and write error is tracked
in the page.
As there is a separate page for each super block copy all can be
submitted in parallel, as before.
This was inspired by Matthew's question
https://lore.kernel.org/all/Yn%2FtxWbij5voeGOB@casper.infradead.org/
Signed-off-by: David Sterba <dsterba@suse.com>
---
v2:
- allocate 3 pages per device to keep parallelism, otherwise the
submission would be serialized on the page lock
fs/btrfs/disk-io.c | 42 +++++++++++-------------------------------
fs/btrfs/volumes.c | 12 ++++++++++++
fs/btrfs/volumes.h | 3 +++
3 files changed, 26 insertions(+), 31 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 800ad3a9c68e..8a9c7a868727 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3887,7 +3887,6 @@ static void btrfs_end_super_write(struct bio *bio)
SetPageUptodate(page);
}
- put_page(page);
unlock_page(page);
}
@@ -3974,7 +3973,6 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
- struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
@@ -3989,7 +3987,6 @@ static int write_dev_supers(struct btrfs_device *device,
for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct bio *bio;
- struct btrfs_super_block *disk_super;
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
@@ -4012,21 +4009,17 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
- page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
- GFP_NOFS);
- if (!page) {
- btrfs_err(device->fs_info,
- "couldn't get super block page for bytenr %llu",
- bytenr);
- errors++;
- continue;
- }
-
- /* Bump the refcount for wait_dev_supers() */
- get_page(page);
+ /*
+ * Super block is copied to a temporary page, which is locked
+ * and submitted for write. Page is unlocked after IO finishes.
+ * No page references are needed, write error is returned as
+ * page Error bit.
+ */
+ page = device->sb_write_page[i];
+ ClearPageError(page);
+ lock_page(page);
- disk_super = page_address(page);
- memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
+ memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
/*
* Directly use bios here instead of relying on the page cache
@@ -4093,14 +4086,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
device->commit_total_bytes)
break;
- page = find_get_page(device->bdev->bd_inode->i_mapping,
- bytenr >> PAGE_SHIFT);
- if (!page) {
- errors++;
- if (i == 0)
- primary_failed = true;
- continue;
- }
+ page = device->sb_write_page[i];
/* Page is submitted locked and unlocked once the IO completes */
wait_on_page_locked(page);
if (PageError(page)) {
@@ -4108,12 +4094,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
if (i == 0)
primary_failed = true;
}
-
- /* Drop our reference */
- put_page(page);
-
- /* Drop the reference from the writing run */
- put_page(page);
}
/* log error, force error return */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 12a6150ee19d..a00546d2c7ea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -394,6 +394,8 @@ void btrfs_free_device(struct btrfs_device *device)
rcu_string_free(device->name);
extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
+ for (int i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++)
+ __free_page(device->sb_write_page[i]);
kfree(device);
}
@@ -6898,6 +6900,16 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
if (!dev)
return ERR_PTR(-ENOMEM);
+ for (int i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ dev->sb_write_page[i] = alloc_page(GFP_KERNEL);
+ if (!dev->sb_write_page[i]) {
+ while (--i >= 0)
+ __free_page(dev->sb_write_page[i]);
+ kfree(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
INIT_LIST_HEAD(&dev->post_commit_list);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 588367c76c46..516709e1d9f8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -10,6 +10,7 @@
#include <linux/sort.h>
#include <linux/btrfs.h>
#include "async-thread.h"
+#include "disk-io.h"
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
@@ -158,6 +159,8 @@ struct btrfs_device {
/* Bio used for flushing device barriers */
struct bio flush_bio;
struct completion flush_wait;
+ /* Temporary pages for writing the super block copies */
+ struct page *sb_write_page[BTRFS_SUPER_MIRROR_MAX];
/* per-device scrub information */
struct scrub_ctx *scrub_ctx;
--
2.36.1
next reply other threads:[~2022-06-09 16:51 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-09 16:46 David Sterba [this message]
2022-06-09 21:00 ` [PATCH v2] btrfs: use preallocated pages for super block write Matthew Wilcox
2022-06-09 22:54 ` David Sterba
2022-06-09 22:58 ` Qu Wenruo
2022-06-09 22:59 ` David Sterba
2022-06-09 23:15 ` Qu Wenruo
2022-06-09 23:35 ` David Sterba
2022-06-10 1:40 ` Matthew Wilcox
2022-06-10 2:46 ` Qu Wenruo
2022-06-10 3:31 ` Matthew Wilcox
2022-06-10 4:53 ` Qu Wenruo
2022-06-10 0:07 ` Qu Wenruo
2022-06-10 7:23 ` Nikolay Borisov
2022-06-10 7:33 ` Qu Wenruo
2022-06-10 7:39 ` Nikolay Borisov
2022-06-10 7:55 ` Qu Wenruo
2022-06-10 8:39 ` Filipe Manana
2022-06-10 8:44 ` Qu Wenruo
2022-06-10 8:49 ` Qu Wenruo
2022-06-10 9:07 ` Qu Wenruo
2022-06-10 12:06 ` Nikolay Borisov
2022-06-11 13:30 ` Anand Jain
2022-06-13 6:37 ` Nikolay Borisov
2022-06-13 6:35 ` Nikolay Borisov
2022-06-21 13:24 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220609164629.30316-1-dsterba@suse.com \
--to=dsterba@suse.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=nborisov@suse.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox