* [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
@ 2010-04-19 10:45 Yan, Zheng
2010-04-19 13:57 ` Josef Bacik
0 siblings, 1 reply; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 10:45 UTC (permalink / raw)
To: linux-btrfs
We already have fs_info->chunk_mutex to avoid concurrent
chunk creation.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h 2010-04-18 08:12:22.086699485 +0800
+++ 3/fs/btrfs/ctree.h 2010-04-18 08:13:15.457699211 +0800
@@ -700,9 +700,7 @@ struct btrfs_space_info {
struct list_head list;
/* for controlling how we free up space for allocations */
- wait_queue_head_t allocate_wait;
wait_queue_head_t flush_wait;
- int allocating_chunk;
int flushing;
/* for block groups in our same type */
diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c 2010-04-18 08:12:22.092698714 +0800
+++ 3/fs/btrfs/extent-tree.c 2010-04-18 08:13:15.463699138 +0800
@@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
init_waitqueue_head(&found->flush_wait);
- init_waitqueue_head(&found->allocate_wait);
spin_lock_init(&found->lock);
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM |
@@ -3000,71 +3002,6 @@ flush:
wake_up(&info->flush_wait);
}
-static int maybe_allocate_chunk(struct btrfs_root *root,
- struct btrfs_space_info *info)
-{
- struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
- struct btrfs_trans_handle *trans;
- bool wait = false;
- int ret = 0;
- u64 min_metadata;
- u64 free_space;
-
- free_space = btrfs_super_total_bytes(disk_super);
- /*
- * we allow the metadata to grow to a max of either 10gb or 5% of the
- * space in the volume.
- */
- min_metadata = min((u64)10 * 1024 * 1024 * 1024,
- div64_u64(free_space * 5, 100));
- if (info->total_bytes >= min_metadata) {
- spin_unlock(&info->lock);
- return 0;
- }
-
- if (info->full) {
- spin_unlock(&info->lock);
- return 0;
- }
-
- if (!info->allocating_chunk) {
- info->force_alloc = 1;
- info->allocating_chunk = 1;
- } else {
- wait = true;
- }
-
- spin_unlock(&info->lock);
-
- if (wait) {
- wait_event(info->allocate_wait,
- !info->allocating_chunk);
- return 1;
- }
-
- trans = btrfs_start_transaction(root, 1);
- if (!trans) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- 4096 + 2 * 1024 * 1024,
- info->flags, 0);
- btrfs_end_transaction(trans, root);
- if (ret)
- goto out;
-out:
- spin_lock(&info->lock);
- info->allocating_chunk = 0;
- spin_unlock(&info->lock);
- wake_up(&info->allocate_wait);
-
- if (ret)
- return 0;
- return 1;
-}
-
/*
* Reserve metadata space for delalloc.
*/
@@ -3105,7 +3042,8 @@ again:
flushed++;
if (flushed == 1) {
- if (maybe_allocate_chunk(root, meta_sinfo))
+ if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+ num_bytes))
goto again;
flushed++;
} else {
@@ -3220,7 +3158,8 @@ again:
if (used > meta_sinfo->total_bytes) {
retries++;
if (retries == 1) {
- if (maybe_allocate_chunk(root, meta_sinfo))
+ if (maybe_allocate_chunk(NULL, root, meta_sinfo,
+ num_bytes))
goto again;
retries++;
} else {
@@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
rcu_read_unlock();
}
+static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+ u64 alloc_bytes)
+{
+ u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+
+ if (sinfo->bytes_used + sinfo->bytes_reserved +
+ alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+ return 0;
+
+ if (sinfo->bytes_used + sinfo->bytes_reserved +
+ alloc_bytes < div_factor(num_bytes, 8))
+ return 0;
+
+ return 1;
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
{
struct btrfs_space_info *space_info;
struct btrfs_fs_info *fs_info = extent_root->fs_info;
- u64 thresh;
int ret = 0;
mutex_lock(&fs_info->chunk_mutex);
@@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
goto out;
}
- thresh = space_info->total_bytes - space_info->bytes_readonly;
- thresh = div_factor(thresh, 8);
- if (!force &&
- (space_info->bytes_used + space_info->bytes_pinned +
- space_info->bytes_reserved + alloc_bytes) < thresh) {
+ if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
@@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
+ else
+ ret = 1;
space_info->force_alloc = 0;
spin_unlock(&space_info->lock);
out:
@@ -3479,6 +3431,38 @@ out:
return ret;
}
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 num_bytes)
+{
+ int ret;
+ int end_trans = 0;
+
+ if (sinfo->full)
+ return 0;
+
+ spin_lock(&sinfo->lock);
+ ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
+ spin_unlock(&sinfo->lock);
+ if (!ret)
+ return 0;
+
+ if (!trans) {
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
+ end_trans = 1;
+ }
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes + 2 * 1024 * 1024,
+ get_alloc_profile(root, sinfo->flags), 0);
+
+ if (end_trans)
+ btrfs_end_transaction(trans, root);
+
+ return ret == 1 ? 1 : 0;
+}
+
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
@ 2010-04-19 13:57 ` Josef Bacik
2010-04-19 14:46 ` Yan, Zheng
0 siblings, 1 reply; 5+ messages in thread
From: Josef Bacik @ 2010-04-19 13:57 UTC (permalink / raw)
To: Yan, Zheng; +Cc: linux-btrfs
On Mon, Apr 19, 2010 at 06:45:44PM +0800, Yan, Zheng wrote:
> We already have fs_info->chunk_mutex to avoid concurrent
> chunk creation.
>
> Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
>
> ---
> diff -urp 2/fs/btrfs/ctree.h 3/fs/btrfs/ctree.h
> --- 2/fs/btrfs/ctree.h 2010-04-18 08:12:22.086699485 +0800
> +++ 3/fs/btrfs/ctree.h 2010-04-18 08:13:15.457699211 +0800
> @@ -700,9 +700,7 @@ struct btrfs_space_info {
> struct list_head list;
>
> /* for controlling how we free up space for allocations */
> - wait_queue_head_t allocate_wait;
> wait_queue_head_t flush_wait;
> - int allocating_chunk;
> int flushing;
>
> /* for block groups in our same type */
> diff -urp 2/fs/btrfs/extent-tree.c 3/fs/btrfs/extent-tree.c
> --- 2/fs/btrfs/extent-tree.c 2010-04-18 08:12:22.092698714 +0800
> +++ 3/fs/btrfs/extent-tree.c 2010-04-18 08:13:15.463699138 +0800
> @@ -70,6 +70,9 @@ static int find_next_key(struct btrfs_pa
> struct btrfs_key *key);
> static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
> int dump_block_groups);
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + struct btrfs_space_info *sinfo, u64 num_bytes);
>
> static noinline int
> block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -2687,7 +2690,6 @@ static int update_space_info(struct btrf
> INIT_LIST_HEAD(&found->block_groups[i]);
> init_rwsem(&found->groups_sem);
> init_waitqueue_head(&found->flush_wait);
> - init_waitqueue_head(&found->allocate_wait);
> spin_lock_init(&found->lock);
> found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
> BTRFS_BLOCK_GROUP_SYSTEM |
> @@ -3000,71 +3002,6 @@ flush:
> wake_up(&info->flush_wait);
> }
>
> -static int maybe_allocate_chunk(struct btrfs_root *root,
> - struct btrfs_space_info *info)
> -{
> - struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
> - struct btrfs_trans_handle *trans;
> - bool wait = false;
> - int ret = 0;
> - u64 min_metadata;
> - u64 free_space;
> -
> - free_space = btrfs_super_total_bytes(disk_super);
> - /*
> - * we allow the metadata to grow to a max of either 10gb or 5% of the
> - * space in the volume.
> - */
> - min_metadata = min((u64)10 * 1024 * 1024 * 1024,
> - div64_u64(free_space * 5, 100));
> - if (info->total_bytes >= min_metadata) {
> - spin_unlock(&info->lock);
> - return 0;
> - }
> -
> - if (info->full) {
> - spin_unlock(&info->lock);
> - return 0;
> - }
> -
> - if (!info->allocating_chunk) {
> - info->force_alloc = 1;
> - info->allocating_chunk = 1;
> - } else {
> - wait = true;
> - }
> -
> - spin_unlock(&info->lock);
> -
> - if (wait) {
> - wait_event(info->allocate_wait,
> - !info->allocating_chunk);
> - return 1;
> - }
> -
> - trans = btrfs_start_transaction(root, 1);
> - if (!trans) {
> - ret = -ENOMEM;
> - goto out;
> - }
> -
> - ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> - 4096 + 2 * 1024 * 1024,
> - info->flags, 0);
> - btrfs_end_transaction(trans, root);
> - if (ret)
> - goto out;
> -out:
> - spin_lock(&info->lock);
> - info->allocating_chunk = 0;
> - spin_unlock(&info->lock);
> - wake_up(&info->allocate_wait);
> -
> - if (ret)
> - return 0;
> - return 1;
> -}
> -
> /*
> * Reserve metadata space for delalloc.
> */
> @@ -3105,7 +3042,8 @@ again:
> flushed++;
>
> if (flushed == 1) {
> - if (maybe_allocate_chunk(root, meta_sinfo))
> + if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> + num_bytes))
> goto again;
> flushed++;
> } else {
> @@ -3220,7 +3158,8 @@ again:
> if (used > meta_sinfo->total_bytes) {
> retries++;
> if (retries == 1) {
> - if (maybe_allocate_chunk(root, meta_sinfo))
> + if (maybe_allocate_chunk(NULL, root, meta_sinfo,
> + num_bytes))
> goto again;
> retries++;
> } else {
> @@ -3417,13 +3356,28 @@ static void force_metadata_allocation(st
> rcu_read_unlock();
> }
>
> +static int should_alloc_chunk(struct btrfs_space_info *sinfo,
> + u64 alloc_bytes)
> +{
> + u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
> +
> + if (sinfo->bytes_used + sinfo->bytes_reserved +
> + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
> + return 0;
> +
> + if (sinfo->bytes_used + sinfo->bytes_reserved +
> + alloc_bytes < div_factor(num_bytes, 8))
> + return 0;
> +
> + return 1;
> +}
> +
> static int do_chunk_alloc(struct btrfs_trans_handle *trans,
> struct btrfs_root *extent_root, u64 alloc_bytes,
> u64 flags, int force)
> {
> struct btrfs_space_info *space_info;
> struct btrfs_fs_info *fs_info = extent_root->fs_info;
> - u64 thresh;
> int ret = 0;
>
> mutex_lock(&fs_info->chunk_mutex);
> @@ -3446,11 +3400,7 @@ static int do_chunk_alloc(struct btrfs_t
> goto out;
> }
>
> - thresh = space_info->total_bytes - space_info->bytes_readonly;
> - thresh = div_factor(thresh, 8);
> - if (!force &&
> - (space_info->bytes_used + space_info->bytes_pinned +
> - space_info->bytes_reserved + alloc_bytes) < thresh) {
> + if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
> spin_unlock(&space_info->lock);
> goto out;
> }
> @@ -3472,6 +3422,8 @@ static int do_chunk_alloc(struct btrfs_t
> spin_lock(&space_info->lock);
> if (ret)
> space_info->full = 1;
> + else
> + ret = 1;
> space_info->force_alloc = 0;
> spin_unlock(&space_info->lock);
> out:
> @@ -3479,6 +3431,38 @@ out:
> return ret;
> }
>
> +static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + struct btrfs_space_info *sinfo, u64 num_bytes)
> +{
> + int ret;
> + int end_trans = 0;
> +
> + if (sinfo->full)
> + return 0;
> +
maybe_allocate_chunk is called with the info->lock already held, this will
deadlock.
> + spin_lock(&sinfo->lock);
> + ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
> + spin_unlock(&sinfo->lock);
> + if (!ret)
> + return 0;
> +
> + if (!trans) {
> + trans = btrfs_join_transaction(root, 1);
> + BUG_ON(IS_ERR(trans));
> + end_trans = 1;
> + }
> +
> + ret = do_chunk_alloc(trans, root->fs_info->extent_root,
> + num_bytes + 2 * 1024 * 1024,
> + get_alloc_profile(root, sinfo->flags), 0);
> +
> + if (end_trans)
> + btrfs_end_transaction(trans, root);
> +
> + return ret == 1 ? 1 : 0;
> +}
> +
> static int update_block_group(struct btrfs_trans_handle *trans,
> struct btrfs_root *root,
> u64 bytenr, u64 num_bytes, int alloc,
The purpose of maybe_allocate_chunk was that there is no way to know if some
other CPU is currently trying to allocate a chunk for the given space info. We
could have two cpu's come inot do_chunk_alloc at relatively the same time and
end up allocating twice the amount of space, which is why I did the waitqueue
thing. It seems like this is still a possibility with your patch. Thanks,
Josef
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
2010-04-19 13:57 ` Josef Bacik
@ 2010-04-19 14:46 ` Yan, Zheng
2010-04-19 14:48 ` Josef Bacik
0 siblings, 1 reply; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 14:46 UTC (permalink / raw)
To: Josef Bacik; +Cc: linux-btrfs
On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrote:
> The purpose of maybe_allocate_chunk was that there is no way to know =
if some
> other CPU is currently trying to allocate a chunk for the given space=
info. =A0We
> could have two cpu's come inot do_chunk_alloc at relatively the same =
time and
> end up allocating twice the amount of space, which is why I did the w=
aitqueue
> thing. =A0It seems like this is still a possibility with your patch. =
=A0Thanks,
>
This is impossible because the very first thing do_chunk_alloc does is
lock the chunk_mutex.
Yan, Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
2010-04-19 14:46 ` Yan, Zheng
@ 2010-04-19 14:48 ` Josef Bacik
2010-04-19 15:34 ` Yan, Zheng
0 siblings, 1 reply; 5+ messages in thread
From: Josef Bacik @ 2010-04-19 14:48 UTC (permalink / raw)
To: Yan, Zheng; +Cc: Josef Bacik, linux-btrfs
On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng wrote:
> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrote=
:
> > The purpose of maybe_allocate_chunk was that there is no way to kno=
w if some
> > other CPU is currently trying to allocate a chunk for the given spa=
ce info. =A0We
> > could have two cpu's come inot do_chunk_alloc at relatively the sam=
e time and
> > end up allocating twice the amount of space, which is why I did the=
waitqueue
> > thing. =A0It seems like this is still a possibility with your patch=
=2E =A0Thanks,
> >
> This is impossible because the very first thing do_chunk_alloc does i=
s
> lock the chunk_mutex.
>=20
Sure, that just means we don't get two things creating chunks at the sa=
me time,
but not from creating them one right after another. So CPU 0 and 1 com=
e in to
the check free space stuff, realize they need to allocate a chunk, and =
race to
call do_chunk_alloc. One of them wins, and the other blocks on the chu=
nk_mutex
lock. When the first finishes the other one is able to continue and do=
what it
was originally going to do, and then you get two chunks when you really=
only
wanted one. Thanks,
Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/12] Btrfs: Kill allocate_wait in space_info
2010-04-19 14:48 ` Josef Bacik
@ 2010-04-19 15:34 ` Yan, Zheng
0 siblings, 0 replies; 5+ messages in thread
From: Yan, Zheng @ 2010-04-19 15:34 UTC (permalink / raw)
To: Josef Bacik; +Cc: linux-btrfs
On Mon, Apr 19, 2010 at 10:48 PM, Josef Bacik <josef@redhat.com> wrote:
> On Mon, Apr 19, 2010 at 10:46:12PM +0800, Yan, Zheng wrote:
>> On Mon, Apr 19, 2010 at 9:57 PM, Josef Bacik <josef@redhat.com> wrot=
e:
>> > The purpose of maybe_allocate_chunk was that there is no way to kn=
ow if some
>> > other CPU is currently trying to allocate a chunk for the given sp=
ace info. =A0We
>> > could have two cpu's come inot do_chunk_alloc at relatively the sa=
me time and
>> > end up allocating twice the amount of space, which is why I did th=
e waitqueue
>> > thing. =A0It seems like this is still a possibility with your patc=
h. =A0Thanks,
>> >
>> This is impossible because the very first thing do_chunk_alloc does =
is
>> lock the chunk_mutex.
>>
>
> Sure, that just means we don't get two things creating chunks at the =
same time,
> but not from creating them one right after another. =A0So CPU 0 and 1=
come in to
> the check free space stuff, realize they need to allocate a chunk, an=
d race to
> call do_chunk_alloc. =A0One of them wins, and the other blocks on the=
chunk_mutex
> lock. =A0When the first finishes the other one is able to continue an=
d do what it
> was originally going to do, and then you get two chunks when you real=
ly only
> wanted one. =A0Thanks,
>
there is a check in do_chunk_alloc, so the later one will do nothing
if the first
call adds enough space.
Yan Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" =
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-04-19 15:34 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-19 10:45 [PATCH 02/12] Btrfs: Kill allocate_wait in space_info Yan, Zheng
2010-04-19 13:57 ` Josef Bacik
2010-04-19 14:46 ` Yan, Zheng
2010-04-19 14:48 ` Josef Bacik
2010-04-19 15:34 ` Yan, Zheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).