From: Miao Xie <miaox@cn.fujitsu.com>
To: Chris Mason <chris.mason@oracle.com>, Josef Bacik <josef@redhat.com>
Cc: Linux Btrfs <linux-btrfs@vger.kernel.org>
Subject: [PATCH 2/2] btrfs: fix deadlock when doing reservation
Date: Wed, 15 Jun 2011 18:47:43 +0800 [thread overview]
Message-ID: <4DF88DCF.2030502@cn.fujitsu.com> (raw)
The following deadlock may happen when doing reservation for metadata:
Task0 Flush thread Task1
start_transaction()
shrink_delalloc()
writeback_inodes_sb_nr()
wait for flush thread
end.
btrfs_writepages()
cow_file_range()
btrfs_commit_transaction
wait num_writer == 1
(wait Task0 end
transaction)
start_transaction()
wait trans commit
end
Task0 -> Flush thread -> Task1 -> Task0
Fix the above deadlock by doing reservation before the trans handle has
been joined into the transaction.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
fs/btrfs/extent-tree.c | 25 +++++++++++++----------
fs/btrfs/transaction.c | 51 +++++++++++++++++++++++++++++++++--------------
2 files changed, 50 insertions(+), 26 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2..eefa432 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3290,9 +3290,11 @@ again:
/*
* shrink metadata reservation for delalloc
+ *
+ * NOTE: This function can not run in the transaction context, or deadlock
+ * will happen.
*/
-static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
@@ -3338,9 +3340,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0 || reclaimed >= max_reclaim)
break;
- if (trans && trans->transaction->blocked)
- return -EAGAIN;
-
time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
@@ -3449,12 +3448,16 @@ again:
/*
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
+ *
+ * shrink_delalloc() can not run in the transaction context.
*/
- ret = shrink_delalloc(trans, root, num_bytes, 1);
- if (ret > 0)
- return 0;
- else if (ret < 0)
- goto out;
+ if (!trans || !trans->transaction) {
+ ret = shrink_delalloc(root, num_bytes);
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ goto out;
+ }
/*
* So if we were overcommitted it's possible that somebody else flushed
@@ -3989,7 +3992,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve, 0);
+ shrink_delalloc(root, to_reserve);
return 0;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b..173b15d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -222,9 +222,31 @@ again:
if (!h)
return ERR_PTR(-ENOMEM);
+ h->transid = 0;
+ h->transaction = NULL;
+ h->blocks_used = 0;
+ h->bytes_reserved = 0;
+ h->delayed_ref_updates = 0;
+ h->use_count = 1;
+ h->block_rsv = NULL;
+ h->orig_rsv = NULL;
+
if (may_wait_transaction(root, type))
wait_current_trans(root);
+ if (num_items > 0) {
+ /*
+ * Now the handle has not been joined into the transaction,
+ * so btrfs will shrink metadata reservation for delalloc if
+ * there is no enough free space to reserve.
+ */
+ ret = btrfs_trans_reserve_metadata(h, root, num_items);
+ if (ret < 0 && ret != -EAGAIN) {
+ kmem_cache_free(btrfs_trans_handle_cachep, h);
+ return ERR_PTR(ret);
+ }
+ }
+
do {
ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
if (ret == -EBUSY)
@@ -232,6 +254,7 @@ again:
} while (ret == -EBUSY);
if (ret < 0) {
+ btrfs_trans_release_metadata(h, root);
kmem_cache_free(btrfs_trans_handle_cachep, h);
return ERR_PTR(ret);
}
@@ -240,12 +263,6 @@ again:
h->transid = cur_trans->transid;
h->transaction = cur_trans;
- h->blocks_used = 0;
- h->bytes_reserved = 0;
- h->delayed_ref_updates = 0;
- h->use_count = 1;
- h->block_rsv = NULL;
- h->orig_rsv = NULL;
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -253,21 +270,25 @@ again:
goto again;
}
- if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items);
- if (ret == -EAGAIN && !retries) {
+ /*
+ * Though we shrink metadata reservation for delalloc, we might still
+ * not get enough free space, so we will commit the transaction and try
+ * to reclaim the reservation.
+ *
+ * NOTE: In the transaction context, we won't shrink metadata
+ * reservation for delalloc, or the deadlock will happen.
+ */
+ if (num_items > 0 && !h->bytes_reserved) {
+ if (!retries) {
retries++;
btrfs_commit_transaction(h, root);
goto again;
- } else if (ret == -EAGAIN) {
+ } else {
/*
- * We have already retried and got EAGAIN, so really we
- * don't have space, so set ret to -ENOSPC.
+ * We have already retried, so really we don't have
+ * space, so set ret to -ENOSPC.
*/
ret = -ENOSPC;
- }
-
- if (ret < 0) {
btrfs_end_transaction(h, root);
return ERR_PTR(ret);
}
--
1.7.4
next reply other threads:[~2011-06-15 10:47 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-15 10:47 Miao Xie [this message]
2011-06-15 13:44 ` [PATCH 2/2] btrfs: fix deadlock when doing reservation Josef Bacik
2011-06-16 14:36 ` Mitch Harder
2011-06-16 14:52 ` Josef Bacik
2011-06-16 17:17 ` Mitch Harder
2011-06-16 17:17 ` Josef Bacik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DF88DCF.2030502@cn.fujitsu.com \
--to=miaox@cn.fujitsu.com \
--cc=chris.mason@oracle.com \
--cc=josef@redhat.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.