All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@redhat.com>
To: "Yan, Zheng" <zheng.yan@oracle.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
Date: Mon, 19 Apr 2010 09:57:14 -0400	[thread overview]
Message-ID: <20100419135713.GA2352@localhost.localdomain> (raw)
In-Reply-To: <4BCC3458.5030600@oracle.com>

On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng wrote:
> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
> 
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
> 
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
>  	struct list_head list;
>  
>  	/* for controlling how we free up space for allocations */
> -	wait_queue_head_t allocate_wait;
>  	wait_queue_head_t flush_wait;
> -	int allocating_chunk;
>  	int flushing;
>  
>  	/* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
>  			 struct btrfs_key *key);
>  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
>  			    int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes);
>  
>  static noinline int
>  block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
>  		INIT_LIST_HEAD(&found->block_groups[i]);
>  	init_rwsem(&found->groups_sem);
>  	init_waitqueue_head(&found->flush_wait);
> -	init_waitqueue_head(&found->allocate_wait);
>  	spin_lock_init(&found->lock);
>  	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
>  				BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
>  	wake_up(&info->flush_wait);
>  }
>  
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> -				 struct btrfs_space_info *info)
> -{
> -	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
> -	struct btrfs_trans_handle *trans;
> -	bool wait = false;
> -	int ret = 0;
> -	u64 min_metadata;
> -	u64 free_space;
> -
> -	free_space = btrfs_super_total_bytes(disk_super);
> -	/*
> -	 * we allow the metadata to grow to a max of either 10gb or 5% of the
> -	 * space in the volume.
> -	 */
> -	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> -			     div64_u64(free_space * 5, 100));
> -	if (info->total_bytes >= min_metadata) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (info->full) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (!info->allocating_chunk) {
> -		info->force_alloc = 1;
> -		info->allocating_chunk = 1;
> -	} else {
> -		wait = true;
> -	}
> -
> -	spin_unlock(&info->lock);
> -
> -	if (wait) {
> -		wait_event(info->allocate_wait,
> -			   !info->allocating_chunk);
> -		return 1;
> -	}
> -
> -	trans = btrfs_start_transaction(root, 1);
> -	if (!trans) {
> -		ret = -ENOMEM;
> -		goto out;
> -	}
> -
> -	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> -			     4096 + 2 * 1024 * 1024,
> -			     info->flags, 0);
> -	btrfs_end_transaction(trans, root);
> -	if (ret)
> -		goto out;
> -out:
> -	spin_lock(&info->lock);
> -	info->allocating_chunk = 0;
> -	spin_unlock(&info->lock);
> -	wake_up(&info->allocate_wait);
> -
> -	if (ret)
> -		return 0;
> -	return 1;
> -}
> -
>  /*
>   * Reserve metadata space for delalloc.
>   */
> @@ -3105,7 +3042,8 @@ again:
>  		flushed++;
>  
>  		if (flushed == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			flushed++;
>  		} else {
> @@ -3220,7 +3158,8 @@ again:
>  	if (used > meta_sinfo->total_bytes) {
>  		retries++;
>  		if (retries == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			retries++;
>  		} else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
>  	rcu_read_unlock();
>  }
>  
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> +			      u64 alloc_bytes)
> +{
> +	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> +		return 0;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes < div_factor(num_bytes, 8))
> +		return 0;
> +
> +	return 1;
> +}
> +
>  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
>  			  struct btrfs_root *extent_root, u64 alloc_bytes,
>  			  u64 flags, int force)
>  {
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_fs_info *fs_info = extent_root->fs_info;
> -	u64 thresh;
>  	int ret = 0;
>  
>  	mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
>  		goto out;
>  	}
>  
> -	thresh = space_info->total_bytes - space_info->bytes_readonly;
> -	thresh = div_factor(thresh, 8);
> -	if (!force &&
> -	   (space_info->bytes_used + space_info->bytes_pinned +
> -	    space_info->bytes_reserved + alloc_bytes) < thresh) {
> +	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
>  		spin_unlock(&space_info->lock);
>  		goto out;
>  	}
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
>  	spin_lock(&space_info->lock);
>  	if (ret)
>  		space_info->full = 1;
> +	else
> +		ret = 1;
>  	space_info->force_alloc = 0;
>  	spin_unlock(&space_info->lock);
>  out:
> @@ -3479,6 +3431,38 @@ out:
>  	return ret;
>  }
>  
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> +	int ret;
> +	int end_trans = 0;
> +
> +	if (sinfo->full)
> +		return 0;
> +

maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.

> +	spin_lock(&sinfo->lock);
> +	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> +	spin_unlock(&sinfo->lock);
> +	if (!ret)
> +		return 0;
> +
> +	if (!trans) {
> +		trans = btrfs_join_transaction(root, 1);
> +		BUG_ON(IS_ERR(trans));
> +		end_trans = 1;
> +	}
> +
> +	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> +			     num_bytes + 2 * 1024 * 1024,
> +			     get_alloc_profile(root, sinfo->flags), 0);
> +
> +	if (end_trans)
> +		btrfs_end_transaction(trans, root);
> +
> +	return ret == 1 ? 1 : 0;
> +}
> +
>  static int update_block_group(struct btrfs_trans_handle *trans,
>  			      struct btrfs_root *root,
>  			      u64 bytenr, u64 num_bytes, int alloc,

The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info.  We
could have two cpu's come inot do_chunk_alloc at relatively the same time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing.  It seems like this is still a possibility with your patch.  Thanks,

Josef

  reply	other threads:[~2010-04-19 13:57 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
2010-04-19 13:57 ` Josef Bacik [this message]
2010-04-19 14:46   ` Yan, Zheng
2010-04-19 14:48     ` Josef Bacik
2010-04-19 15:34       ` Yan, Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100419135713.GA2352@localhost.localdomain \
    --to=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=zheng.yan@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.