linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@redhat.com>
To: "Yan, Zheng" <zheng.yan@oracle.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
Date: Mon, 19 Apr 2010 09:57:14 -0400	[thread overview]
Message-ID: <20100419135713.GA2352@localhost.localdomain> (raw)
In-Reply-To: <4BCC3458.5030600@oracle.com>

On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng wrote:
> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
> 
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
> 
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
>  	struct list_head list;
>  
>  	/* for controlling how we free up space for allocations */
> -	wait_queue_head_t allocate_wait;
>  	wait_queue_head_t flush_wait;
> -	int allocating_chunk;
>  	int flushing;
>  
>  	/* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
>  			 struct btrfs_key *key);
>  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
>  			    int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes);
>  
>  static noinline int
>  block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
>  		INIT_LIST_HEAD(&found->block_groups[i]);
>  	init_rwsem(&found->groups_sem);
>  	init_waitqueue_head(&found->flush_wait);
> -	init_waitqueue_head(&found->allocate_wait);
>  	spin_lock_init(&found->lock);
>  	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
>  				BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
>  	wake_up(&info->flush_wait);
>  }
>  
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> -				 struct btrfs_space_info *info)
> -{
> -	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
> -	struct btrfs_trans_handle *trans;
> -	bool wait = false;
> -	int ret = 0;
> -	u64 min_metadata;
> -	u64 free_space;
> -
> -	free_space = btrfs_super_total_bytes(disk_super);
> -	/*
> -	 * we allow the metadata to grow to a max of either 10gb or 5% of the
> -	 * space in the volume.
> -	 */
> -	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> -			     div64_u64(free_space * 5, 100));
> -	if (info->total_bytes >= min_metadata) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (info->full) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (!info->allocating_chunk) {
> -		info->force_alloc = 1;
> -		info->allocating_chunk = 1;
> -	} else {
> -		wait = true;
> -	}
> -
> -	spin_unlock(&info->lock);
> -
> -	if (wait) {
> -		wait_event(info->allocate_wait,
> -			   !info->allocating_chunk);
> -		return 1;
> -	}
> -
> -	trans = btrfs_start_transaction(root, 1);
> -	if (!trans) {
> -		ret = -ENOMEM;
> -		goto out;
> -	}
> -
> -	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> -			     4096 + 2 * 1024 * 1024,
> -			     info->flags, 0);
> -	btrfs_end_transaction(trans, root);
> -	if (ret)
> -		goto out;
> -out:
> -	spin_lock(&info->lock);
> -	info->allocating_chunk = 0;
> -	spin_unlock(&info->lock);
> -	wake_up(&info->allocate_wait);
> -
> -	if (ret)
> -		return 0;
> -	return 1;
> -}
> -
>  /*
>   * Reserve metadata space for delalloc.
>   */
> @@ -3105,7 +3042,8 @@ again:
>  		flushed++;
>  
>  		if (flushed == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			flushed++;
>  		} else {
> @@ -3220,7 +3158,8 @@ again:
>  	if (used > meta_sinfo->total_bytes) {
>  		retries++;
>  		if (retries == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			retries++;
>  		} else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
>  	rcu_read_unlock();
>  }
>  
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> +			      u64 alloc_bytes)
> +{
> +	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> +		return 0;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes < div_factor(num_bytes, 8))
> +		return 0;
> +
> +	return 1;
> +}
> +
>  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
>  			  struct btrfs_root *extent_root, u64 alloc_bytes,
>  			  u64 flags, int force)
>  {
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_fs_info *fs_info = extent_root->fs_info;
> -	u64 thresh;
>  	int ret = 0;
>  
>  	mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
>  		goto out;
>  	}
>  
> -	thresh = space_info->total_bytes - space_info->bytes_readonly;
> -	thresh = div_factor(thresh, 8);
> -	if (!force &&
> -	   (space_info->bytes_used + space_info->bytes_pinned +
> -	    space_info->bytes_reserved + alloc_bytes) < thresh) {
> +	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
>  		spin_unlock(&space_info->lock);
>  		goto out;
>  	}
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
>  	spin_lock(&space_info->lock);
>  	if (ret)
>  		space_info->full = 1;
> +	else
> +		ret = 1;
>  	space_info->force_alloc = 0;
>  	spin_unlock(&space_info->lock);
>  out:
> @@ -3479,6 +3431,38 @@ out:
>  	return ret;
>  }
>  
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> +	int ret;
> +	int end_trans = 0;
> +
> +	if (sinfo->full)
> +		return 0;
> +

maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.

> +	spin_lock(&sinfo->lock);
> +	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> +	spin_unlock(&sinfo->lock);
> +	if (!ret)
> +		return 0;
> +
> +	if (!trans) {
> +		trans = btrfs_join_transaction(root, 1);
> +		BUG_ON(IS_ERR(trans));
> +		end_trans = 1;
> +	}
> +
> +	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> +			     num_bytes + 2 * 1024 * 1024,
> +			     get_alloc_profile(root, sinfo->flags), 0);
> +
> +	if (end_trans)
> +		btrfs_end_transaction(trans, root);
> +
> +	return ret == 1 ? 1 : 0;
> +}
> +
>  static int update_block_group(struct btrfs_trans_handle *trans,
>  			      struct btrfs_root *root,
>  			      u64 bytenr, u64 num_bytes, int alloc,

The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info.  We
could have two cpu's come inot do_chunk_alloc at relatively the same time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing.  It seems like this is still a possibility with your patch.  Thanks,

Josef

  reply	other threads:[~2010-04-19 13:57 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
2010-04-19 13:57 ` Josef Bacik [this message]
2010-04-19 14:46   ` Yan, Zheng
2010-04-19 14:48     ` Josef Bacik
2010-04-19 15:34       ` Yan, Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100419135713.GA2352@localhost.localdomain \
    --to=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=zheng.yan@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).