From: Qu Wenruo <quwenruo.btrfs@gmx.com>
To: Josef Bacik <josef@toxicpanda.com>,
linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: Re: [PATCH] btrfs: introduce rescue=all
Date: Wed, 9 Sep 2020 07:10:49 +0800 [thread overview]
Message-ID: <b862077c-0903-b4f0-ccc9-ee1c815534bc@gmx.com> (raw)
In-Reply-To: <921662f28b90d7e5a67bb52a1e0b0b2e9584f946.1599579772.git.josef@toxicpanda.com>
[-- Attachment #1.1: Type: text/plain, Size: 12654 bytes --]
On 2020/9/8 下午11:43, Josef Bacik wrote:
> One of the things that came up consistently in talking with Fedora about
> switching to btrfs as default is that btrfs is particularly vulnerable
> to metadata corruption. If any of the core global roots are corrupted,
> the fs is unmountable and fsck can't usually do anything for you without
> some special options.
>
> What we really want is a simple mount option we can tell all users to
> try if things are really wrong that are going to give them the highest
> chance of allowing them to mount their file system and copy off their
> data in the most direct way possible.
Then "all" doesn't look correct for such usage.
I'd prefer "salvage" then.
Thanks,
Qu
>
> Obviously if the fs roots are screwed then the user is in trouble, but
> otherwise this makes it much easier to pull stuff off the disk without
> needing our special rescue tools. I tested this with an xfstest that
> I've submitted along side this patch that clears the roots and makes
> sure we're still able to read the data on the disk.
>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
> fs/btrfs/block-group.c | 46 ++++++++++++++++++++++++++
> fs/btrfs/ctree.h | 1 +
> fs/btrfs/disk-io.c | 73 ++++++++++++++++++++++++++++--------------
> fs/btrfs/inode.c | 6 +++-
> fs/btrfs/super.c | 29 +++++++++++++++--
> fs/btrfs/volumes.c | 7 ++++
> 6 files changed, 135 insertions(+), 27 deletions(-)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 01e8ba1da1d3..dfff2b6e7708 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1991,6 +1991,49 @@ static int read_one_block_group(struct btrfs_fs_info *info,
> return ret;
> }
>
> +static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
> +{
> + struct extent_map_tree *em_tree = &fs_info->mapping_tree;
> + struct extent_map *em;
> + struct map_lookup *map;
> + struct btrfs_block_group *bg;
> + struct btrfs_space_info *space_info;
> + struct rb_node *node;
> + int ret = 0;
> +
> + for (node = rb_first_cached(&em_tree->map); node;
> + node = rb_next(node)) {
> + em = rb_entry(node, struct extent_map, rb_node);
> + map = em->map_lookup;
> + bg = btrfs_create_block_group_cache(fs_info, em->start);
> + if (!bg) {
> + ret = -ENOMEM;
> + break;
> + }
> +
> + /* Fill dummy cache as FULL */
> + bg->length = em->len;
> + bg->flags = map->type;
> + bg->last_byte_to_unpin = (u64)-1;
> + bg->cached = BTRFS_CACHE_FINISHED;
> + bg->used = em->len;
> + bg->flags = map->type;
> + ret = btrfs_add_block_group_cache(fs_info, bg);
> + if (ret) {
> + btrfs_remove_free_space_cache(bg);
> + btrfs_put_block_group(bg);
> + break;
> + }
> + btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
> + 0, &space_info);
> + bg->space_info = space_info;
> + link_block_group(bg);
> +
> + set_avail_alloc_bits(fs_info, bg->flags);
> + }
> + return ret;
> +}
> +
> int btrfs_read_block_groups(struct btrfs_fs_info *info)
> {
> struct btrfs_path *path;
> @@ -2001,6 +2044,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
> int need_clear = 0;
> u64 cache_gen;
>
> + if (btrfs_test_opt(info, RESCUE_ALL))
> + return fill_dummy_bgs(info);
> +
> key.objectid = 0;
> key.offset = 0;
> key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 98c5f6178efc..f84b3f67faa4 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1275,6 +1275,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
> #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
> #define BTRFS_MOUNT_REF_VERIFY (1 << 28)
> #define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
> +#define BTRFS_MOUNT_RESCUE_ALL (1 << 30)
>
> #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
> #define BTRFS_DEFAULT_MAX_INLINE (2048)
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 7147237d9bf0..f61366a57be4 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2303,30 +2303,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
>
> root = btrfs_read_tree_root(tree_root, &location);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->extent_root = root;
> }
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->extent_root = root;
>
> location.objectid = BTRFS_DEV_TREE_OBJECTID;
> root = btrfs_read_tree_root(tree_root, &location);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->dev_root = root;
> + btrfs_init_devices_late(fs_info);
> }
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->dev_root = root;
> - btrfs_init_devices_late(fs_info);
>
> location.objectid = BTRFS_CSUM_TREE_OBJECTID;
> root = btrfs_read_tree_root(tree_root, &location);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->csum_root = root;
> }
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->csum_root = root;
>
> /*
> * This tree can share blocks with some other fs tree during relocation
> @@ -2335,11 +2344,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
> root = btrfs_get_fs_root(tree_root->fs_info,
> BTRFS_DATA_RELOC_TREE_OBJECTID, true);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->data_reloc_root = root;
> }
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->data_reloc_root = root;
>
> location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
> root = btrfs_read_tree_root(tree_root, &location);
> @@ -2352,9 +2364,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
> location.objectid = BTRFS_UUID_TREE_OBJECTID;
> root = btrfs_read_tree_root(tree_root, &location);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - if (ret != -ENOENT)
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + if (ret != -ENOENT)
> + goto out;
> + }
> } else {
> set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> fs_info->uuid_root = root;
> @@ -2364,11 +2378,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
> location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
> root = btrfs_read_tree_root(tree_root, &location);
> if (IS_ERR(root)) {
> - ret = PTR_ERR(root);
> - goto out;
> + if (!btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + ret = PTR_ERR(root);
> + goto out;
> + }
> + } else {
> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> + fs_info->free_space_root = root;
> }
> - set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
> - fs_info->free_space_root = root;
> }
>
> return 0;
> @@ -3082,6 +3099,14 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
> goto fail_alloc;
> }
>
> + /* Skip bg needs RO and no tree-log to replay */
> + if (btrfs_test_opt(fs_info, RESCUE_ALL) && !sb_rdonly(sb)) {
> + btrfs_err(fs_info,
> + "rescue=all can only be used on read-only mount");
> + err = -EINVAL;
> + goto fail_alloc;
> + }
> +
> ret = btrfs_init_workqueues(fs_info, fs_devices);
> if (ret) {
> err = ret;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index cce6f8789a4e..d90c86ac9ebe 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2196,7 +2196,8 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
> int skip_sum;
> int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
>
> - skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
> + skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
> + !fs_info->csum_root;
>
> if (btrfs_is_free_space_inode(BTRFS_I(inode)))
> metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
> @@ -2851,6 +2852,9 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
> if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
> return 0;
>
> + if (!root->fs_info->csum_root)
> + return 0;
> +
> if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
> test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
> clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 3ebe7240c63d..89703c2ec1f3 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -360,6 +360,7 @@ enum {
> Opt_rescue,
> Opt_usebackuproot,
> Opt_nologreplay,
> + Opt_rescue_all,
>
> /* Deprecated options */
> Opt_recovery,
> @@ -455,6 +456,7 @@ static const match_table_t tokens = {
> static const match_table_t rescue_tokens = {
> {Opt_usebackuproot, "usebackuproot"},
> {Opt_nologreplay, "nologreplay"},
> + {Opt_rescue_all, "all"},
> {Opt_err, NULL},
> };
>
> @@ -487,6 +489,11 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
> btrfs_set_and_info(info, NOLOGREPLAY,
> "disabling log replay at mount time");
> break;
> + case Opt_rescue_all:
> + btrfs_set_and_info(info, RESCUE_ALL,
> + "only reading fs roots, also setting nologreplay");
> + btrfs_set_opt(info->mount_opt, NOLOGREPLAY);
> + break;
> case Opt_err:
> btrfs_info(info, "unrecognized rescue option '%s'", p);
> ret = -EINVAL;
> @@ -1421,6 +1428,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
> seq_puts(seq, ",notreelog");
> if (btrfs_test_opt(info, NOLOGREPLAY))
> seq_puts(seq, ",rescue=nologreplay");
> + if (btrfs_test_opt(info, RESCUE_ALL))
> + seq_puts(seq, ",rescue=all");
> if (btrfs_test_opt(info, FLUSHONCOMMIT))
> seq_puts(seq, ",flushoncommit");
> if (btrfs_test_opt(info, DISCARD_SYNC))
> @@ -1858,6 +1867,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
> if (ret)
> goto restore;
>
> + if (btrfs_test_opt(fs_info, RESCUE_ALL) !=
> + (old_opts & BTRFS_MOUNT_RESCUE_ALL)) {
> + btrfs_err(fs_info,
> + "rescue=all mount option can't be changed during remount");
> + ret = -EINVAL;
> + goto restore;
> + }
> +
> btrfs_remount_begin(fs_info, old_opts, *flags);
> btrfs_resize_thread_pool(fs_info,
> fs_info->thread_pool_size, old_thread_pool_size);
> @@ -1924,6 +1941,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
> goto restore;
> }
>
> + if (btrfs_test_opt(fs_info, RESCUE_ALL)) {
> + btrfs_err(fs_info,
> + "remounting read-write with rescue=all is not allowed");
> + ret = -EINVAL;
> + goto restore;
> + }
> +
> ret = btrfs_cleanup_fs_roots(fs_info);
> if (ret)
> goto restore;
> @@ -2238,8 +2262,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
> * still can allocate chunks and thus are fine using the currently
> * calculated f_bavail.
> */
> - if (!mixed && block_rsv->space_info->full &&
> - total_free_meta - thresh < block_rsv->size)
> + if (btrfs_test_opt(fs_info, RESCUE_ALL) ||
> + (!mixed && block_rsv->space_info->full &&
> + total_free_meta - thresh < block_rsv->size))
> buf->f_bavail = 0;
>
> buf->f_type = BTRFS_SUPER_MAGIC;
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2a55356ef4a2..614acd4b9988 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -7648,6 +7648,13 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
> u64 prev_dev_ext_end = 0;
> int ret = 0;
>
> + /*
> + * For rescue=all mount option, we're already RO and are salvaging
> + * data, no need for such strict check.
> + */
> + if (btrfs_test_opt(fs_info, RESCUE_ALL))
> + return 0;
> +
> key.objectid = 1;
> key.type = BTRFS_DEV_EXTENT_KEY;
> key.offset = 0;
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
next prev parent reply other threads:[~2020-09-08 23:11 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-08 15:43 [PATCH] btrfs: introduce rescue=all Josef Bacik
2020-09-08 23:10 ` Qu Wenruo [this message]
2020-09-09 7:20 ` David Sterba
2020-09-09 7:25 ` Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b862077c-0903-b4f0-ccc9-ee1c815534bc@gmx.com \
--to=quwenruo.btrfs@gmx.com \
--cc=josef@toxicpanda.com \
--cc=kernel-team@fb.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox