From: Josef Bacik <josef@redhat.com>
To: "Yan, Zheng" <zheng.yan@oracle.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
Date: Mon, 19 Apr 2010 09:57:14 -0400 [thread overview]
Message-ID: <20100419135713.GA2352@localhost.localdomain> (raw)
In-Reply-To: <4BCC3458.5030600@oracle.com>
On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng wrote:
> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
>
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
>
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h 2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h 2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
> struct list_head list;
>
> /* for controlling how we free up space for allocations */
> - wait_queue_head_t allocate_wait;
> wait_queue_head_t flush_wait;
> - int allocating_chunk;
> int flushing;
>
> /* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c 2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c 2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
> struct btrfs_key *key);
> static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
> int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + struct btrfs_space_info *sinfo, u64 num_bytes);
>
> static noinline int
> block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
> INIT_LIST_HEAD(&found->block_groups[i]);
> init_rwsem(&found->groups_sem);
> init_waitqueue_head(&found->flush_wait);
> - init_waitqueue_head(&found->allocate_wait);
> spin_lock_init(&found->lock);
> found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
> BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
> wake_up(&info->flush_wait);
> }
>
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> - struct btrfs_space_info *info)
> -{
> - struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
> - struct btrfs_trans_handle *trans;
> - bool wait = false;
> - int ret = 0;
> - u64 min_metadata;
> - u64 free_space;
> -
> - free_space = btrfs_super_total_bytes(disk_super);
> - /*
> - * we allow the metadata to grow to a max of either 10gb or 5% of the
> - * space in the volume.
> - */
> - min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> - div64_u64(free_space * 5, 100));
> - if (info->total_bytes >= min_metadata) {
> - spin_unlock(&info->lock);
> - return 0;
> - }
> -
> - if (info->full) {
> - spin_unlock(&info->lock);
> - return 0;
> - }
> -
> - if (!info->allocating_chunk) {
> - info->force_alloc = 1;
> - info->allocating_chunk = 1;
> - } else {
> - wait = true;
> - }
> -
> - spin_unlock(&info->lock);
> -
> - if (wait) {
> - wait_event(info->allocate_wait,
> - !info->allocating_chunk);
> - return 1;
> - }
> -
> - trans = btrfs_start_transaction(root, 1);
> - if (!trans) {
> - ret = -ENOMEM;
> - goto out;
> - }
> -
> - ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> - 4096 + 2 * 1024 * 1024,
> - info->flags, 0);
> - btrfs_end_transaction(trans, root);
> - if (ret)
> - goto out;
> -out:
> - spin_lock(&info->lock);
> - info->allocating_chunk = 0;
> - spin_unlock(&info->lock);
> - wake_up(&info->allocate_wait);
> -
> - if (ret)
> - return 0;
> - return 1;
> -}
> -
> /*
> * Reserve metadata space for delalloc.
> */
> @@ -3105,7 +3042,8 @@ again:
> flushed++;
>
> if (flushed == 1) {
> - if (maybe_allocate_chunk(root, meta_sinfo))
> + if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> + num_bytes))
> goto again;
> flushed++;
> } else {
> @@ -3220,7 +3158,8 @@ again:
> if (used > meta_sinfo->total_bytes) {
> retries++;
> if (retries == 1) {
> - if (maybe_allocate_chunk(root, meta_sinfo))
> + if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> + num_bytes))
> goto again;
> retries++;
> } else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
> rcu_read_unlock();
> }
>
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> + u64 alloc_bytes)
> +{
> + u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> + if (sinfo->bytes_used + sinfo->bytes_reserved +
> + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> + return 0;
> +
> + if (sinfo->bytes_used + sinfo->bytes_reserved +
> + alloc_bytes < div_factor(num_bytes, 8))
> + return 0;
> +
> + return 1;
> +}
> +
> static int do_chunk_alloc(struct btrfs_trans_handle *trans,
> struct btrfs_root *extent_root, u64 alloc_bytes,
> u64 flags, int force)
> {
> struct btrfs_space_info *space_info;
> struct btrfs_fs_info *fs_info = extent_root->fs_info;
> - u64 thresh;
> int ret = 0;
>
> mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
> goto out;
> }
>
> - thresh = space_info->total_bytes - space_info->bytes_readonly;
> - thresh = div_factor(thresh, 8);
> - if (!force &&
> - (space_info->bytes_used + space_info->bytes_pinned +
> - space_info->bytes_reserved + alloc_bytes) < thresh) {
> + if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
> spin_unlock(&space_info->lock);
> goto out;
> }
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
> spin_lock(&space_info->lock);
> if (ret)
> space_info->full = 1;
> + else
> + ret = 1;
> space_info->force_alloc = 0;
> spin_unlock(&space_info->lock);
> out:
> @@ -3479,6 +3431,38 @@ out:
> return ret;
> }
>
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> + int ret;
> + int end_trans = 0;
> +
> + if (sinfo->full)
> + return 0;
> +
maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.
> + spin_lock(&sinfo->lock);
> + ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> + spin_unlock(&sinfo->lock);
> + if (!ret)
> + return 0;
> +
> + if (!trans) {
> + trans = btrfs_join_transaction(root, 1);
> + BUG_ON(IS_ERR(trans));
> + end_trans = 1;
> + }
> +
> + ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> + num_bytes + 2 * 1024 * 1024,
> + get_alloc_profile(root, sinfo->flags), 0);
> +
> + if (end_trans)
> + btrfs_end_transaction(trans, root);
> +
> + return ret == 1 ? 1 : 0;
> +}
> +
> static int update_block_group(struct btrfs_trans_handle *trans,
> struct btrfs_root *root,
> u64 bytenr, u64 num_bytes, int alloc,
The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info. We
could have two cpu's come inot do_chunk_alloc at relatively the same time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing. It seems like this is still a possibility with your patch. Thanks,
Josef
next prev parent reply other threads:[~2010-04-19 13:57 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
2010-04-19 13:57 ` Josef Bacik [this message]
2010-04-19 14:46 ` Yan, Zheng
2010-04-19 14:48 ` Josef Bacik
2010-04-19 15:34 ` Yan, Zheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100419135713.GA2352@localhost.localdomain \
--to=josef@redhat.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=zheng.yan@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).