From: Liu Bo <bo.li.liu@oracle.com>
To: Qu Wenruo <quwenruo@cn.fujitsu.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v4 1/5] btrfs: scrub: Introduce full stripe lock for RAID56
Date: Thu, 30 Mar 2017 09:49:21 -0700 [thread overview]
Message-ID: <20170330164920.GA8963@lim.localdomain> (raw)
In-Reply-To: <20170330063251.16872-2-quwenruo@cn.fujitsu.com>
On Thu, Mar 30, 2017 at 02:32:47PM +0800, Qu Wenruo wrote:
> Unlike mirror based profiles, RAID5/6 recovery needs to read out the
> whole full stripe.
>
> And if we don't do proper protect, it can easily cause race condition.
>
> Introduce 2 new functions: lock_full_stripe() and unlock_full_stripe()
> for RAID5/6.
> Which stores a rb_tree of mutex for full stripes, so scrub callers can
> use them to lock a full stripe to avoid race.
>
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> fs/btrfs/ctree.h | 17 ++++
> fs/btrfs/extent-tree.c | 11 +++
> fs/btrfs/scrub.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 245 insertions(+)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 29b7fc28c607..9fe56da21fed 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -538,6 +538,14 @@ struct btrfs_io_ctl {
> unsigned check_crcs:1;
> };
>
> +/*
> + * Tree to record all locked full stripes of a RAID5/6 block group
> + */
> +struct btrfs_full_stripe_locks_tree {
> + struct rb_root root;
> + struct mutex lock;
> +};
> +
> struct btrfs_block_group_cache {
> struct btrfs_key key;
> struct btrfs_block_group_item item;
> @@ -648,6 +656,9 @@ struct btrfs_block_group_cache {
> * Protected by free_space_lock.
> */
> int needs_free_space;
> +
> + /* Record locked full stripes for RAID5/6 block group */
> + struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
> };
>
> /* delayed seq elem */
> @@ -3647,6 +3658,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
> struct btrfs_device *dev);
> int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
> struct btrfs_scrub_progress *progress);
> +static inline void btrfs_init_full_stripe_locks_tree(
> + struct btrfs_full_stripe_locks_tree *locks_root)
> +{
> + locks_root->root = RB_ROOT;
> + mutex_init(&locks_root->lock);
> +}
>
> /* dev-replace.c */
> void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index be5477676cc8..5f51ce81f484 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
> if (atomic_dec_and_test(&cache->count)) {
> WARN_ON(cache->pinned > 0);
> WARN_ON(cache->reserved > 0);
> +
> + /*
> + * If not empty, someone is still holding mutex of
> + * full_stripe_lock, which can only be released by caller.
> + * And it will definitely cause use-after-free when caller
> + * tries to release full stripe lock.
> + *
> + * No better way to resolve, but only to warn.
> + */
> + WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
> kfree(cache->free_space_ctl);
> kfree(cache);
> }
> @@ -9917,6 +9927,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
> btrfs_init_free_space_ctl(cache);
> atomic_set(&cache->trimming, 0);
> mutex_init(&cache->free_space_lock);
> + btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
>
> return cache;
> }
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index b0251eb1239f..5fc99a92b4ff 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -240,6 +240,13 @@ struct scrub_warning {
> struct btrfs_device *dev;
> };
>
> +struct full_stripe_lock {
> + struct rb_node node;
> + u64 logical;
> + u64 refs;
> + struct mutex mutex;
> +};
> +
> static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
> static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
> static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
> @@ -349,6 +356,216 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
> }
>
> /*
> + * Insert new full stripe lock into full stripe locks tree
> + *
> + * Return pointer to existing or newly inserted full_stripe_lock structure if
> + * everything works well.
> + * Return ERR_PTR(-ENOMEM) if we failed to allocate memory
> + *
> + * NOTE: caller must hold full_stripe_locks_root->lock before calling this
> + * function
> + */
> +static struct full_stripe_lock *insert_full_stripe_lock(
> + struct btrfs_full_stripe_locks_tree *locks_root,
> + u64 fstripe_logical)
> +{
> + struct rb_node **p;
> + struct rb_node *parent = NULL;
> + struct full_stripe_lock *entry;
> + struct full_stripe_lock *ret;
> +
> + WARN_ON(!mutex_is_locked(&locks_root->lock));
> +
> + p = &locks_root->root.rb_node;
> + while (*p) {
> + parent = *p;
> + entry = rb_entry(parent, struct full_stripe_lock, node);
> + if (fstripe_logical < entry->logical) {
> + p = &(*p)->rb_left;
> + } else if (fstripe_logical > entry->logical) {
> + p = &(*p)->rb_right;
> + } else {
> + entry->refs++;
> + return entry;
> + }
> + }
> +
> + /* Insert new lock */
> + ret = kmalloc(sizeof(*ret), GFP_KERNEL);
> + if (!ret)
> + return ERR_PTR(-ENOMEM);
> + ret->logical = fstripe_logical;
> + ret->refs = 1;
> + mutex_init(&ret->mutex);
> +
> + rb_link_node(&ret->node, parent, p);
> + rb_insert_color(&ret->node, &locks_root->root);
> + return ret;
> +}
> +
> +/*
> + * Search for a full stripe lock of a block group
> + *
> + * Return pointer to existing full stripe lock if found
> + * Return NULL if not found
> + */
> +static struct full_stripe_lock *search_full_stripe_lock(
> + struct btrfs_full_stripe_locks_tree *locks_root,
> + u64 fstripe_logical)
> +{
> + struct rb_node *node;
> + struct full_stripe_lock *entry;
> +
> + WARN_ON(!mutex_is_locked(&locks_root->lock));
> +
> + node = locks_root->root.rb_node;
> + while (node) {
> + entry = rb_entry(node, struct full_stripe_lock, node);
> + if (fstripe_logical < entry->logical)
> + node = node->rb_left;
> + else if (fstripe_logical > entry->logical)
> + node = node->rb_right;
> + else
> + return entry;
> + }
> + return NULL;
> +}
> +
> +/*
> + * Helper to get full stripe logical from a normal bytenr.
> + *
> + * Caller must ensure @cache is a RAID56 block group.
> + */
> +static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
> + u64 bytenr)
> +{
> + u64 ret;
> +
> + /*
> + * round_down() can only handle power of 2, while RAID56 full
> + * stripe len can be 64KiB * n, so need manual round down.
> + */
> + ret = (bytenr - cache->key.objectid) / cache->full_stripe_len *
> + cache->full_stripe_len + cache->key.objectid;
Can you please use div_u64 instead? '/' would cause building errors.
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Thanks,
-liubo
next prev parent reply other threads:[~2017-03-30 16:50 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-30 6:32 [PATCH v4 0/5] raid56: scrub related fixes Qu Wenruo
2017-03-30 6:32 ` [PATCH v4 1/5] btrfs: scrub: Introduce full stripe lock for RAID56 Qu Wenruo
2017-03-30 16:49 ` Liu Bo [this message]
2017-03-31 1:29 ` Qu Wenruo
2017-03-31 17:34 ` Liu Bo
2017-04-03 0:48 ` Qu Wenruo
2017-03-30 6:32 ` [PATCH v4 2/5] btrfs: scrub: Fix RAID56 recovery race condition Qu Wenruo
2017-03-30 17:05 ` Liu Bo
2017-03-31 0:25 ` Qu Wenruo
2017-03-31 1:40 ` Qu Wenruo
2017-03-30 6:32 ` [PATCH v4 3/5] btrfs: scrub: Don't append on-disk pages for raid56 scrub Qu Wenruo
2017-03-30 6:32 ` [PATCH v4 4/5] btrfs: Wait flighting bio before freeing target device for raid56 Qu Wenruo
2017-03-30 6:32 ` [PATCH v4 5/5] btrfs: Prevent scrub recheck from racing with dev replace Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170330164920.GA8963@lim.localdomain \
--to=bo.li.liu@oracle.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=quwenruo@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).