From: Qu Wenruo <quwenruo@cn.fujitsu.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 2/3] btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans
Date: Mon, 26 Oct 2015 14:11:18 +0800 [thread overview]
Message-ID: <1445839879-17716-3-git-send-email-quwenruo@cn.fujitsu.com> (raw)
In-Reply-To: <1445839879-17716-1-git-send-email-quwenruo@cn.fujitsu.com>
Between btrfs_allocerved_file_extent() and
btrfs_add_delayed_qgroup_reserve(), there is a window that delayed_refs
are run and delayed ref head maybe freed before
btrfs_add_delayed_qgroup_reserve().
This will cause btrfs_dad_delayed_qgroup_reserve() to return -ENOENT,
and cause transaction to be aborted.
This patch will record qgroup reserve space info into delayed_ref_head
at btrfs_add_delayed_ref(), to eliminate the race window.
Reported-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/ctree.h | 3 ++-
fs/btrfs/delayed-ref.c | 22 +++++++++++++++++-----
fs/btrfs/delayed-ref.h | 2 +-
fs/btrfs/extent-tree.c | 14 ++++++++------
fs/btrfs/inode.c | 12 ++++--------
5 files changed, 32 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3fa3c3b..a8c9a27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3403,7 +3403,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner,
- u64 offset, struct btrfs_key *ins);
+ u64 offset, u64 ram_bytes,
+ struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner, u64 offset,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 449974f..8d65427 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -422,7 +422,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
struct btrfs_qgroup_extent_record *qrecord,
- u64 bytenr, u64 num_bytes, int action, int is_data)
+ u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
+ int action, int is_data)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
@@ -431,6 +432,9 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
int count_mod = 1;
int must_insert_reserved = 0;
+ /* If reserved is provided, it must be a data extent. */
+ BUG_ON(!is_data && reserved);
+
/*
* the head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
@@ -480,6 +484,11 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
/* Record qgroup extent info if provided */
if (qrecord) {
+ if (ref_root && reserved) {
+ head_ref->qgroup_ref_root = ref_root;
+ head_ref->qgroup_reserved = reserved;
+ }
+
qrecord->bytenr = bytenr;
qrecord->num_bytes = num_bytes;
qrecord->old_roots = NULL;
@@ -498,6 +507,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
+ WARN_ON(ref_root && reserved && existing->qgroup_ref_root
+ && existing->qgroup_reserved);
update_existing_head_ref(delayed_refs, &existing->node, ref);
/*
* we've updated the existing ref, free the newly
@@ -664,7 +675,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
- bytenr, num_bytes, action, 0);
+ bytenr, num_bytes, 0, 0, action, 0);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
@@ -687,7 +698,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, int action,
+ u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_data_ref *ref;
@@ -726,7 +737,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
- bytenr, num_bytes, action, 1);
+ bytenr, num_bytes, ref_root, reserved,
+ action, 1);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
@@ -781,7 +793,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_refs->lock);
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
- num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+ num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
spin_unlock(&delayed_refs->lock);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f9cf234..00ed02c 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -248,7 +248,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
- u64 owner, u64 offset, int action,
+ u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d47b11d..93c42ca 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2087,8 +2087,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
BTRFS_ADD_DELAYED_REF, NULL);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, owner, offset,
+ num_bytes, parent, root_objectid,
+ owner, offset, 0,
BTRFS_ADD_DELAYED_REF, NULL);
}
return ret;
@@ -6818,8 +6818,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
- offset, BTRFS_DROP_DELAYED_REF,
- NULL);
+ offset, 0,
+ BTRFS_DROP_DELAYED_REF, NULL);
}
return ret;
}
@@ -7745,7 +7745,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner,
- u64 offset, struct btrfs_key *ins)
+ u64 offset, u64 ram_bytes,
+ struct btrfs_key *ins)
{
int ret;
@@ -7754,7 +7755,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT, NULL);
+ ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
+ NULL);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e8b7bc3..f3d6b33 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2127,17 +2127,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_alloc_reserved_file_extent(trans, root,
root->root_key.objectid,
- btrfs_ino(inode), file_pos, &ins);
- if (ret < 0)
- goto out;
+ btrfs_ino(inode), file_pos,
+ ram_bytes, &ins);
/*
- * Release the reserved range from inode dirty range map, and
- * move it to delayed ref codes, as now accounting only happens at
- * commit_transaction() time.
+ * Release the reserved range from inode dirty range map, as it is
+ * already moved into delayed_ref_head
*/
btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
- ret = btrfs_add_delayed_qgroup_reserve(root->fs_info, trans,
- root->objectid, disk_bytenr, ram_bytes);
out:
btrfs_free_path(path);
--
2.6.2
next prev parent reply other threads:[~2015-10-26 6:13 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-26 6:11 [4.4][PATCH 0/3] btrfs: Qgroup hotfix Qu Wenruo
2015-10-26 6:11 ` [PATCH 1/3] btrfs: Cleanup no_quota parameter Qu Wenruo
2015-10-26 8:14 ` Filipe Manana
2015-10-26 8:25 ` Qu Wenruo
2015-10-26 8:27 ` Qu Wenruo
2015-10-26 8:28 ` Filipe Manana
2015-10-26 6:11 ` Qu Wenruo [this message]
2015-10-26 6:11 ` [PATCH 3/3] btrfs: qgroup: Fix a rebase bug which will cause qgroup double free Qu Wenruo
2015-10-29 11:09 ` [3/3] " Johannes Henninger
2015-10-30 10:23 ` Johannes Henninger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1445839879-17716-3-git-send-email-quwenruo@cn.fujitsu.com \
--to=quwenruo@cn.fujitsu.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).