[PATCH 1/2] btrfs: Use ref_cnt for set_block_group

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] btrfs: Use ref_cnt for set_block_group_ro()
@ 2015-05-22  5:46 Zhaolei
  2015-05-22  5:46 ` [PATCH 2/2] btrfs: Fix xfstests btrfs/070 Zhaolei
  0 siblings, 1 reply; 3+ messages in thread
From: Zhaolei @ 2015-05-22  5:46 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Zhao Lei

From: Zhao Lei <zhaolei@cn.fujitsu.com>

More than one code call set_block_group_ro() and restore rw in fail.

Old code use bool bit to save blockgroup's ro state, it can not
support parallel case(it is confirmd exist in my debug log).

This patch use ref count to store ro state, and rename
set_block_group_ro/set_block_group_rw
to
inc_block_group_ro/dec_block_group_ro.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 fs/btrfs/ctree.h       |  6 +++---
 fs/btrfs/extent-tree.c | 42 +++++++++++++++++++++---------------------
 fs/btrfs/relocation.c  | 14 ++++++--------
 3 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f364e1..74ce6fc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1300,7 +1300,7 @@ struct btrfs_block_group_cache {
 	/* for raid56, this is a full stripe, without parity */
 	unsigned long full_stripe_len;
 
-	unsigned int ro:1;
+	unsigned int ro;
 	unsigned int iref:1;
 	unsigned int has_caching_ctl:1;
 	unsigned int removed:1;
@@ -3495,9 +3495,9 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
 void btrfs_block_rsv_release(struct btrfs_root *root,
 			     struct btrfs_block_rsv *block_rsv,
 			     u64 num_bytes);
-int btrfs_set_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_root *root,
 			     struct btrfs_block_group_cache *cache);
-void btrfs_set_block_group_rw(struct btrfs_root *root,
+void btrfs_dec_block_group_ro(struct btrfs_root *root,
 			      struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7effed6..6a82ba0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8751,14 +8751,13 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 	return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
+static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
 	struct btrfs_space_info *sinfo = cache->space_info;
 	u64 num_bytes;
 	u64 min_allocable_bytes;
 	int ret = -ENOSPC;
 
-
 	/*
 	 * We need some metadata space and system metadata space for
 	 * allocating chunks in some corner cases until we force to set
@@ -8775,6 +8774,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 	spin_lock(&cache->lock);
 
 	if (cache->ro) {
+		cache->ro++;
 		ret = 0;
 		goto out;
 	}
@@ -8786,7 +8786,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 	    sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
 	    min_allocable_bytes <= sinfo->total_bytes) {
 		sinfo->bytes_readonly += num_bytes;
-		cache->ro = 1;
+		cache->ro++;
 		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
 		ret = 0;
 	}
@@ -8796,7 +8796,7 @@ out:
 	return ret;
 }
 
-int btrfs_set_block_group_ro(struct btrfs_root *root,
+int btrfs_inc_block_group_ro(struct btrfs_root *root,
 			     struct btrfs_block_group_cache *cache)
 
 {
@@ -8804,8 +8804,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 	u64 alloc_flags;
 	int ret;
 
-	BUG_ON(cache->ro);
-
 again:
 	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans))
@@ -8830,7 +8828,7 @@ again:
 	}
 
 
-	ret = set_block_group_ro(cache, 0);
+	ret = inc_block_group_ro(cache, 0);
 	if (!ret)
 		goto out;
 	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -8838,7 +8836,7 @@ again:
 			     CHUNK_ALLOC_FORCE);
 	if (ret < 0)
 		goto out;
-	ret = set_block_group_ro(cache, 0);
+	ret = inc_block_group_ro(cache, 0);
 out:
 	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
 		alloc_flags = update_block_group_flags(root, cache->flags);
@@ -8899,7 +8897,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
 	return free_bytes;
 }
 
-void btrfs_set_block_group_rw(struct btrfs_root *root,
+void btrfs_dec_block_group_ro(struct btrfs_root *root,
 			      struct btrfs_block_group_cache *cache)
 {
 	struct btrfs_space_info *sinfo = cache->space_info;
@@ -8909,11 +8907,13 @@ void btrfs_set_block_group_rw(struct btrfs_root *root,
 
 	spin_lock(&sinfo->lock);
 	spin_lock(&cache->lock);
-	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
-		    cache->bytes_super - btrfs_block_group_used(&cache->item);
-	sinfo->bytes_readonly -= num_bytes;
-	cache->ro = 0;
-	list_del_init(&cache->ro_list);
+	if (!--cache->ro) {
+		num_bytes = cache->key.offset - cache->reserved -
+			    cache->pinned - cache->bytes_super -
+			    btrfs_block_group_used(&cache->item);
+		sinfo->bytes_readonly -= num_bytes;
+		list_del_init(&cache->ro_list);
+	}
 	spin_unlock(&cache->lock);
 	spin_unlock(&sinfo->lock);
 }
@@ -9429,7 +9429,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
 		set_avail_alloc_bits(root->fs_info, cache->flags);
 		if (btrfs_chunk_readonly(root, cache->key.objectid)) {
-			set_block_group_ro(cache, 1);
+			inc_block_group_ro(cache, 1);
 		} else if (btrfs_block_group_used(&cache->item) == 0) {
 			spin_lock(&info->unused_bgs_lock);
 			/* Should always be true but just in case. */
@@ -9457,11 +9457,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		list_for_each_entry(cache,
 				&space_info->block_groups[BTRFS_RAID_RAID0],
 				list)
-			set_block_group_ro(cache, 1);
+			inc_block_group_ro(cache, 1);
 		list_for_each_entry(cache,
 				&space_info->block_groups[BTRFS_RAID_SINGLE],
 				list)
-			set_block_group_ro(cache, 1);
+			inc_block_group_ro(cache, 1);
 	}
 
 	init_global_block_rsv(info);
@@ -9930,7 +9930,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		spin_unlock(&block_group->lock);
 
 		/* We don't want to force the issue, only flip if it's ok. */
-		ret = set_block_group_ro(block_group, 0);
+		ret = inc_block_group_ro(block_group, 0);
 		up_write(&space_info->groups_sem);
 		if (ret < 0) {
 			ret = 0;
@@ -9944,7 +9944,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		/* 1 for btrfs_orphan_reserve_metadata() */
 		trans = btrfs_start_transaction(root, 1);
 		if (IS_ERR(trans)) {
-			btrfs_set_block_group_rw(root, block_group);
+			btrfs_dec_block_group_ro(root, block_group);
 			ret = PTR_ERR(trans);
 			goto next;
 		}
@@ -9971,14 +9971,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 				  EXTENT_DIRTY, GFP_NOFS);
 		if (ret) {
 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
-			btrfs_set_block_group_rw(root, block_group);
+			btrfs_dec_block_group_ro(root, block_group);
 			goto end_trans;
 		}
 		ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
 				  EXTENT_DIRTY, GFP_NOFS);
 		if (ret) {
 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
-			btrfs_set_block_group_rw(root, block_group);
+			btrfs_dec_block_group_ro(root, block_group);
 			goto end_trans;
 		}
 		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 74b24b0..c93b66f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4206,14 +4206,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 	rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
 	BUG_ON(!rc->block_group);
 
-	if (!rc->block_group->ro) {
-		ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
-		if (ret) {
-			err = ret;
-			goto out;
-		}
-		rw = 1;
+	ret = btrfs_inc_block_group_ro(extent_root, rc->block_group);
+	if (ret) {
+		err = ret;
+		goto out;
 	}
+	rw = 1;
 
 	path = btrfs_alloc_path();
 	if (!path) {
@@ -4285,7 +4283,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
 out:
 	if (err && rw)
-		btrfs_set_block_group_rw(extent_root, rc->block_group);
+		btrfs_dec_block_group_ro(extent_root, rc->block_group);
 	iput(rc->data_inode);
 	btrfs_put_block_group(rc->block_group);
 	kfree(rc);
-- 
1.8.5.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] btrfs: Fix xfstests btrfs/070
  2015-05-22  5:46 [PATCH 1/2] btrfs: Use ref_cnt for set_block_group_ro() Zhaolei
@ 2015-05-22  5:46 ` Zhaolei
  2015-05-26 17:06   ` David Sterba
  0 siblings, 1 reply; 3+ messages in thread
From: Zhaolei @ 2015-05-22  5:46 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Zhao Lei, Qu Wenruo

From: Zhao Lei <zhaolei@cn.fujitsu.com>

xfstests btrfs/070 sometimes failed.
In my test machine, its fail rate is about 30%.
In another vm(vmware), its fail rate is about 50%.

Reason:
btrfs/070 do replace and defrag with fsstress simultaneously,
after above operation, checksum error is found by scrub.
Actually, it have no relationship with defrag operation, only
replace with fsstress can trigger this bug.
New data writen to target device have possibility rewrited by
old data from source device by replace code in debug, and can
be fixed by set chunk to ro in replace operation.

Before patch(4.1-rc3):
  30% failed in 100 xfstests.
After patch:
  0% failed in 300 xfstests.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 fs/btrfs/scrub.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ab58115..469c8a5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3446,6 +3446,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		if (!cache)
 			goto skip;
 
+		/*
+		 * we need call btrfs_inc_block_group_ro() with scrubs_paused,
+		 * to avoid deadlock caused by:
+		 * btrfs_inc_block_group_ro()
+		 * -> btrfs_wait_for_commit()
+		 * -> btrfs_commit_transaction()
+		 * -> btrfs_scrub_pause()
+		 */
+		atomic_inc(&fs_info->scrubs_paused);
+		wake_up(&fs_info->scrub_pause_wait);
+		btrfs_inc_block_group_ro(root, cache);
+		mutex_lock(&fs_info->scrub_lock);
+		__scrub_blocked_if_needed(fs_info);
+		atomic_dec(&fs_info->scrubs_paused);
+		mutex_unlock(&fs_info->scrub_lock);
+		wake_up(&fs_info->scrub_pause_wait);
+
 		dev_replace->cursor_right = found_key.offset + length;
 		dev_replace->cursor_left = found_key.offset;
 		dev_replace->item_needs_writeback = 1;
@@ -3489,6 +3506,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		mutex_unlock(&fs_info->scrub_lock);
 		wake_up(&fs_info->scrub_pause_wait);
 
+		btrfs_dec_block_group_ro(root, cache);
+
 		btrfs_put_block_group(cache);
 		if (ret)
 			break;
-- 
1.8.5.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] btrfs: Fix xfstests btrfs/070
  2015-05-22  5:46 ` [PATCH 2/2] btrfs: Fix xfstests btrfs/070 Zhaolei
@ 2015-05-26 17:06   ` David Sterba
  0 siblings, 0 replies; 3+ messages in thread
From: David Sterba @ 2015-05-26 17:06 UTC (permalink / raw)
  To: Zhaolei; +Cc: linux-btrfs, Qu Wenruo

The subject should reflect the problem being fixed.

On Fri, May 22, 2015 at 01:46:54PM +0800, Zhaolei wrote:
> From: Zhao Lei <zhaolei@cn.fujitsu.com>
> 
> xfstests btrfs/070 sometimes failed.
> In my test machine, its fail rate is about 30%.
> In another vm(vmware), its fail rate is about 50%.
> 
> Reason:
> btrfs/070 do replace and defrag with fsstress simultaneously,
> after above operation, checksum error is found by scrub.
> Actually, it have no relationship with defrag operation, only
> replace with fsstress can trigger this bug.
> New data writen to target device have possibility rewrited by
> old data from source device by replace code in debug, and can
> be fixed by set chunk to ro in replace operation.

Please improve the description, this is very condensed and unclear how
exactly does the read-only/read-write changes happen.

> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3446,6 +3446,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
>  		if (!cache)
>  			goto skip;
>  
> +		/*
> +		 * we need call btrfs_inc_block_group_ro() with scrubs_paused,
> +		 * to avoid deadlock caused by:
> +		 * btrfs_inc_block_group_ro()
> +		 * -> btrfs_wait_for_commit()
> +		 * -> btrfs_commit_transaction()
> +		 * -> btrfs_scrub_pause()
> +		 */
> +		atomic_inc(&fs_info->scrubs_paused);
> +		wake_up(&fs_info->scrub_pause_wait);
> +		btrfs_inc_block_group_ro(root, cache);
> +		mutex_lock(&fs_info->scrub_lock);
> +		__scrub_blocked_if_needed(fs_info);
> +		atomic_dec(&fs_info->scrubs_paused);
> +		mutex_unlock(&fs_info->scrub_lock);
> +		wake_up(&fs_info->scrub_pause_wait);

Please put that into a helper, similar to scrub_blocked_if_needed .

> +
>  		dev_replace->cursor_right = found_key.offset + length;
>  		dev_replace->cursor_left = found_key.offset;
>  		dev_replace->item_needs_writeback = 1;

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-05-26 17:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-22  5:46 [PATCH 1/2] btrfs: Use ref_cnt for set_block_group_ro() Zhaolei
2015-05-22  5:46 ` [PATCH 2/2] btrfs: Fix xfstests btrfs/070 Zhaolei
2015-05-26 17:06   ` David Sterba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).