[PATCH 02/12] Btrfs: Kill allocate_wait in space

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
@ 2010-04-19 10:45 Yan, Zheng
  2010-04-19 13:57 ` Josef Bacik
  0 siblings, 1 reply; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 10:45 UTC (permalink / raw)
  To: linux-btrfs

We already have fs_info->chunk_mutex to avoid concurrent
chunk creation.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>

---
diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
+++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
@@ -700,9 +700,7 @@ struct btrfs_space_info {
 	struct list_head list;
 
 	/* for controlling how we free up space for allocations */
-	wait_queue_head_t allocate_wait;
 	wait_queue_head_t flush_wait;
-	int allocating_chunk;
 	int flushing;
 
 	/* for block groups in our same type */
diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
+++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
@@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
 			 struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 			    int dump_block_groups);
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_space_info *sinfo, u64 num_bytes);
 
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
 		INIT_LIST_HEAD(&found->block_groups[i]);
 	init_rwsem(&found->groups_sem);
 	init_waitqueue_head(&found->flush_wait);
-	init_waitqueue_head(&found->allocate_wait);
 	spin_lock_init(&found->lock);
 	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
 				BTRFS_BLOCK_GROUP_SYSTEM |
@@ -3000,71 +3002,6 @@ flush:
 	wake_up(&info->flush_wait);
 }
 
-static int maybe_allocate_chunk(struct btrfs_root *root,
-				 struct btrfs_space_info *info)
-{
-	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
-	struct btrfs_trans_handle *trans;
-	bool wait = false;
-	int ret = 0;
-	u64 min_metadata;
-	u64 free_space;
-
-	free_space = btrfs_super_total_bytes(disk_super);
-	/*
-	 * we allow the metadata to grow to a max of either 10gb or 5% of the
-	 * space in the volume.
-	 */
-	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
-			     div64_u64(free_space * 5, 100));
-	if (info->total_bytes >= min_metadata) {
-		spin_unlock(&info->lock);
-		return 0;
-	}
-
-	if (info->full) {
-		spin_unlock(&info->lock);
-		return 0;
-	}
-
-	if (!info->allocating_chunk) {
-		info->force_alloc = 1;
-		info->allocating_chunk = 1;
-	} else {
-		wait = true;
-	}
-
-	spin_unlock(&info->lock);
-
-	if (wait) {
-		wait_event(info->allocate_wait,
-			   !info->allocating_chunk);
-		return 1;
-	}
-
-	trans = btrfs_start_transaction(root, 1);
-	if (!trans) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-			     4096 + 2 * 1024 * 1024,
-			     info->flags, 0);
-	btrfs_end_transaction(trans, root);
-	if (ret)
-		goto out;
-out:
-	spin_lock(&info->lock);
-	info->allocating_chunk = 0;
-	spin_unlock(&info->lock);
-	wake_up(&info->allocate_wait);
-
-	if (ret)
-		return 0;
-	return 1;
-}
-
 /*
  * Reserve metadata space for delalloc.
  */
@@ -3105,7 +3042,8 @@ again:
 		flushed++;
 
 		if (flushed == 1) {
-			if (maybe_allocate_chunk(root, meta_sinfo))
+			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+						 num_bytes))
 				goto again;
 			flushed++;
 		} else {
@@ -3220,7 +3158,8 @@ again:
 	if (used > meta_sinfo->total_bytes) {
 		retries++;
 		if (retries == 1) {
-			if (maybe_allocate_chunk(root, meta_sinfo))
+			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+						 num_bytes))
 				goto again;
 			retries++;
 		} else {
@@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
 	rcu_read_unlock();
 }
 
+static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+			      u64 alloc_bytes)
+{
+	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+
+	if (sinfo->bytes_used + sinfo->bytes_reserved +
+	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+		return 0;
+
+	if (sinfo->bytes_used + sinfo->bytes_reserved +
+	    alloc_bytes < div_factor(num_bytes, 8))
+		return 0;
+
+	return 1;
+}
+
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
 			  u64 flags, int force)
 {
 	struct btrfs_space_info *space_info;
 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
-	u64 thresh;
 	int ret = 0;
 
 	mutex_lock(&fs_info->chunk_mutex);
@@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
 		goto out;
 	}
 
-	thresh = space_info->total_bytes - space_info->bytes_readonly;
-	thresh = div_factor(thresh, 8);
-	if (!force &&
-	   (space_info->bytes_used + space_info->bytes_pinned +
-	    space_info->bytes_reserved + alloc_bytes) < thresh) {
+	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
 		spin_unlock(&space_info->lock);
 		goto out;
 	}
@@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
 	spin_lock(&space_info->lock);
 	if (ret)
 		space_info->full = 1;
+	else
+		ret = 1;
 	space_info->force_alloc = 0;
 	spin_unlock(&space_info->lock);
 out:
@@ -3479,6 +3431,38 @@ out:
 	return ret;
 }
 
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct btrfs_space_info *sinfo, u64 num_bytes)
+{
+	int ret;
+	int end_trans = 0;
+
+	if (sinfo->full)
+		return 0;
+
+	spin_lock(&sinfo->lock);
+	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
+	spin_unlock(&sinfo->lock);
+	if (!ret)
+		return 0;
+
+	if (!trans) {
+		trans = btrfs_join_transaction(root, 1);
+		BUG_ON(IS_ERR(trans));
+		end_trans = 1;
+	}
+
+	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+			     num_bytes + 2 * 1024 * 1024,
+			     get_alloc_profile(root, sinfo->flags), 0);
+
+	if (end_trans)
+		btrfs_end_transaction(trans, root);
+
+	return ret == 1 ? 1 : 0;
+}
+
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc,

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
  2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
@ 2010-04-19 13:57 ` Josef Bacik
  2010-04-19 14:46   ` Yan, Zheng
  0 siblings, 1 reply; 5+ messages in thread
From: Josef Bacik @ 2010-04-19 13:57 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: linux-btrfs

On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng wrote:
> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
> 
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
> 
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h	2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h	2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
>  	struct list_head list;
>  
>  	/* for controlling how we free up space for allocations */
> -	wait_queue_head_t allocate_wait;
>  	wait_queue_head_t flush_wait;
> -	int allocating_chunk;
>  	int flushing;
>  
>  	/* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c	2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c	2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
>  			 struct btrfs_key *key);
>  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
>  			    int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes);
>  
>  static noinline int
>  block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
>  		INIT_LIST_HEAD(&found->block_groups[i]);
>  	init_rwsem(&found->groups_sem);
>  	init_waitqueue_head(&found->flush_wait);
> -	init_waitqueue_head(&found->allocate_wait);
>  	spin_lock_init(&found->lock);
>  	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
>  				BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
>  	wake_up(&info->flush_wait);
>  }
>  
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> -				 struct btrfs_space_info *info)
> -{
> -	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
> -	struct btrfs_trans_handle *trans;
> -	bool wait = false;
> -	int ret = 0;
> -	u64 min_metadata;
> -	u64 free_space;
> -
> -	free_space = btrfs_super_total_bytes(disk_super);
> -	/*
> -	 * we allow the metadata to grow to a max of either 10gb or 5% of the
> -	 * space in the volume.
> -	 */
> -	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> -			     div64_u64(free_space * 5, 100));
> -	if (info->total_bytes >= min_metadata) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (info->full) {
> -		spin_unlock(&info->lock);
> -		return 0;
> -	}
> -
> -	if (!info->allocating_chunk) {
> -		info->force_alloc = 1;
> -		info->allocating_chunk = 1;
> -	} else {
> -		wait = true;
> -	}
> -
> -	spin_unlock(&info->lock);
> -
> -	if (wait) {
> -		wait_event(info->allocate_wait,
> -			   !info->allocating_chunk);
> -		return 1;
> -	}
> -
> -	trans = btrfs_start_transaction(root, 1);
> -	if (!trans) {
> -		ret = -ENOMEM;
> -		goto out;
> -	}
> -
> -	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> -			     4096 + 2 * 1024 * 1024,
> -			     info->flags, 0);
> -	btrfs_end_transaction(trans, root);
> -	if (ret)
> -		goto out;
> -out:
> -	spin_lock(&info->lock);
> -	info->allocating_chunk = 0;
> -	spin_unlock(&info->lock);
> -	wake_up(&info->allocate_wait);
> -
> -	if (ret)
> -		return 0;
> -	return 1;
> -}
> -
>  /*
>   * Reserve metadata space for delalloc.
>   */
> @@ -3105,7 +3042,8 @@ again:
>  		flushed++;
>  
>  		if (flushed == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			flushed++;
>  		} else {
> @@ -3220,7 +3158,8 @@ again:
>  	if (used > meta_sinfo->total_bytes) {
>  		retries++;
>  		if (retries == 1) {
> -			if (maybe_allocate_chunk(root, meta_sinfo))
> +			if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> +						 num_bytes))
>  				goto again;
>  			retries++;
>  		} else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
>  	rcu_read_unlock();
>  }
>  
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> +			      u64 alloc_bytes)
> +{
> +	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> +		return 0;
> +
> +	if (sinfo->bytes_used + sinfo->bytes_reserved +
> +	    alloc_bytes < div_factor(num_bytes, 8))
> +		return 0;
> +
> +	return 1;
> +}
> +
>  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
>  			  struct btrfs_root *extent_root, u64 alloc_bytes,
>  			  u64 flags, int force)
>  {
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_fs_info *fs_info = extent_root->fs_info;
> -	u64 thresh;
>  	int ret = 0;
>  
>  	mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
>  		goto out;
>  	}
>  
> -	thresh = space_info->total_bytes - space_info->bytes_readonly;
> -	thresh = div_factor(thresh, 8);
> -	if (!force &&
> -	   (space_info->bytes_used + space_info->bytes_pinned +
> -	    space_info->bytes_reserved + alloc_bytes) < thresh) {
> +	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
>  		spin_unlock(&space_info->lock);
>  		goto out;
>  	}
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
>  	spin_lock(&space_info->lock);
>  	if (ret)
>  		space_info->full = 1;
> +	else
> +		ret = 1;
>  	space_info->force_alloc = 0;
>  	spin_unlock(&space_info->lock);
>  out:
> @@ -3479,6 +3431,38 @@ out:
>  	return ret;
>  }
>  
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> +				struct btrfs_root *root,
> +				struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> +	int ret;
> +	int end_trans = 0;
> +
> +	if (sinfo->full)
> +		return 0;
> +

maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.

> +	spin_lock(&sinfo->lock);
> +	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> +	spin_unlock(&sinfo->lock);
> +	if (!ret)
> +		return 0;
> +
> +	if (!trans) {
> +		trans = btrfs_join_transaction(root, 1);
> +		BUG_ON(IS_ERR(trans));
> +		end_trans = 1;
> +	}
> +
> +	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> +			     num_bytes + 2 * 1024 * 1024,
> +			     get_alloc_profile(root, sinfo->flags), 0);
> +
> +	if (end_trans)
> +		btrfs_end_transaction(trans, root);
> +
> +	return ret == 1 ? 1 : 0;
> +}
> +
>  static int update_block_group(struct btrfs_trans_handle *trans,
>  			      struct btrfs_root *root,
>  			      u64 bytenr, u64 num_bytes, int alloc,

The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info.  We
could have two cpu's come inot do_chunk_alloc at relatively the same time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing.  It seems like this is still a possibility with your patch.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
  2010-04-19 13:57 ` Josef Bacik
@ 2010-04-19 14:46   ` Yan, Zheng
  2010-04-19 14:48     ` Josef Bacik
  0 siblings, 1 reply; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 14:46 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs

On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrote:
> The purpose of maybe_allocate_chunk was that there is no way to know =
if some
> other CPU is currently trying to allocate a chunk for the given space=
 info. =A0We
> could have two cpu's come inot do_chunk_alloc at relatively the same =
time and
> end up allocating twice the amount of space, which is why I did the w=
aitqueue
> thing. =A0It seems like this is still a possibility with your patch. =
=A0Thanks,
>
This is impossible because the very first thing do_chunk_alloc does is
lock the chunk_mutex.

Yan, Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
  2010-04-19 14:46   ` Yan, Zheng
@ 2010-04-19 14:48     ` Josef Bacik
  2010-04-19 15:34       ` Yan, Zheng
  0 siblings, 1 reply; 5+ messages in thread
From: Josef Bacik @ 2010-04-19 14:48 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Josef Bacik, linux-btrfs

On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng wrote:
> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrote=
:
> > The purpose of maybe_allocate_chunk was that there is no way to kno=
w if some
> > other CPU is currently trying to allocate a chunk for the given spa=
ce info. =A0We
> > could have two cpu's come inot do_chunk_alloc at relatively the sam=
e time and
> > end up allocating twice the amount of space, which is why I did the=
 waitqueue
> > thing. =A0It seems like this is still a possibility with your patch=
=2E =A0Thanks,
> >
> This is impossible because the very first thing do_chunk_alloc does i=
s
> lock the chunk_mutex.
>=20

Sure, that just means we don't get two things creating chunks at the sa=
me time,
but not from creating them one right after another.  So CPU 0 and 1 com=
e in to
the check free space stuff, realize they need to allocate a chunk, and =
race to
call do_chunk_alloc.  One of them wins, and the other blocks on the chu=
nk_mutex
lock.  When the first finishes the other one is able to continue and do=
 what it
was originally going to do, and then you get two chunks when you really=
 only
wanted one.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
  2010-04-19 14:48     ` Josef Bacik
@ 2010-04-19 15:34       ` Yan, Zheng
  0 siblings, 0 replies; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 15:34 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs

On Mon, Apr 19, 2010 at 10:48 PM, Josef Bacik <josef@redhat.com> wrote:
> On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng wrote:
>> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrot=
e:
>> > The purpose of maybe_allocate_chunk was that there is no way to kn=
ow if some
>> > other CPU is currently trying to allocate a chunk for the given sp=
ace info. =A0We
>> > could have two cpu's come inot do_chunk_alloc at relatively the sa=
me time and
>> > end up allocating twice the amount of space, which is why I did th=
e waitqueue
>> > thing. =A0It seems like this is still a possibility with your patc=
h. =A0Thanks,
>> >
>> This is impossible because the very first thing do_chunk_alloc does =
is
>> lock the chunk_mutex.
>>
>
> Sure, that just means we don't get two things creating chunks at the =
same time,
> but not from creating them one right after another. =A0So CPU 0 and 1=
 come in to
> the check free space stuff, realize they need to allocate a chunk, an=
d race to
> call do_chunk_alloc. =A0One of them wins, and the other blocks on the=
 chunk_mutex
> lock. =A0When the first finishes the other one is able to continue an=
d do what it
> was originally going to do, and then you get two chunks when you real=
ly only
> wanted one. =A0Thanks,
>

there is a check in do_chunk_alloc, so the later one will do nothing
if the first
call adds enough space.

Yan Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-04-19 15:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
2010-04-19 13:57 ` Josef Bacik
2010-04-19 14:46   ` Yan, Zheng
2010-04-19 14:48     ` Josef Bacik
2010-04-19 15:34       ` Yan, Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).