linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] btrfs: fix deadlock when doing reservation
@ 2011-06-15 10:47 Miao Xie
  2011-06-15 13:44 ` Josef Bacik
  0 siblings, 1 reply; 6+ messages in thread
From: Miao Xie @ 2011-06-15 10:47 UTC (permalink / raw)
  To: Chris Mason, Josef Bacik; +Cc: Linux Btrfs

The following deadlock may happen when doing reservation for metadata:

Task0				Flush thread		Task1
start_transaction()
  shrink_delalloc()
    writeback_inodes_sb_nr()
      wait for flush thread
      end.
				btrfs_writepages()
				  cow_file_range()
							btrfs_commit_transaction
							  wait num_writer == 1
							  (wait Task0 end
							   transaction)
				  start_transaction()
				    wait trans commit
				    end

Task0 -> Flush thread -> Task1 -> Task0

Fix the above deadlock by doing reservation before the trans handle has
been joined into the transaction.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c |   25 +++++++++++++----------
 fs/btrfs/transaction.c |   51 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2..eefa432 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3290,9 +3290,11 @@ again:
 
 /*
  * shrink metadata reservation for delalloc
+ *
+ * NOTE: This function can not run in the transaction context, or deadlock
+ *       will happen.
  */
-static int shrink_delalloc(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, u64 to_reclaim, int sync)
+static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_space_info *space_info;
@@ -3338,9 +3340,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		if (reserved == 0 || reclaimed >= max_reclaim)
 			break;
 
-		if (trans && trans->transaction->blocked)
-			return -EAGAIN;
-
 		time_left = schedule_timeout_interruptible(1);
 
 		/* We were interrupted, exit */
@@ -3449,12 +3448,16 @@ again:
 	/*
 	 * We do synchronous shrinking since we don't actually unreserve
 	 * metadata until after the IO is completed.
+	 *
+	 * shrink_delalloc() can not run in the transaction context.
 	 */
-	ret = shrink_delalloc(trans, root, num_bytes, 1);
-	if (ret > 0)
-		return 0;
-	else if (ret < 0)
-		goto out;
+	if (!trans || !trans->transaction) {
+		ret = shrink_delalloc(root, num_bytes);
+		if (ret > 0)
+			return 0;
+		else if (ret < 0)
+			goto out;
+	}
 
 	/*
 	 * So if we were overcommitted it's possible that somebody else flushed
@@ -3989,7 +3992,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
 	if (block_rsv->size > 512 * 1024 * 1024)
-		shrink_delalloc(NULL, root, to_reserve, 0);
+		shrink_delalloc(root, to_reserve);
 
 	return 0;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b..173b15d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -222,9 +222,31 @@ again:
 	if (!h)
 		return ERR_PTR(-ENOMEM);
 
+	h->transid = 0;
+	h->transaction = NULL;
+	h->blocks_used = 0;
+	h->bytes_reserved = 0;
+	h->delayed_ref_updates = 0;
+	h->use_count = 1;
+	h->block_rsv = NULL;
+	h->orig_rsv = NULL;
+
 	if (may_wait_transaction(root, type))
 		wait_current_trans(root);
 
+	if (num_items > 0) {
+		/*
+		 * Now the handle has not been joined into the transaction,
+		 * so btrfs will shrink metadata reservation for delalloc if
+		 * there is no enough free space to reserve.
+		 */
+		ret = btrfs_trans_reserve_metadata(h, root, num_items);
+		if (ret < 0 && ret != -EAGAIN) {
+			kmem_cache_free(btrfs_trans_handle_cachep, h);
+			return ERR_PTR(ret);
+		}
+	}
+
 	do {
 		ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
 		if (ret == -EBUSY)
@@ -232,6 +254,7 @@ again:
 	} while (ret == -EBUSY);
 
 	if (ret < 0) {
+		btrfs_trans_release_metadata(h, root);
 		kmem_cache_free(btrfs_trans_handle_cachep, h);
 		return ERR_PTR(ret);
 	}
@@ -240,12 +263,6 @@ again:
 
 	h->transid = cur_trans->transid;
 	h->transaction = cur_trans;
-	h->blocks_used = 0;
-	h->bytes_reserved = 0;
-	h->delayed_ref_updates = 0;
-	h->use_count = 1;
-	h->block_rsv = NULL;
-	h->orig_rsv = NULL;
 
 	smp_mb();
 	if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -253,21 +270,25 @@ again:
 		goto again;
 	}
 
-	if (num_items > 0) {
-		ret = btrfs_trans_reserve_metadata(h, root, num_items);
-		if (ret == -EAGAIN && !retries) {
+	/*
+	 * Though we shrink metadata reservation for delalloc, we might still
+	 * not get enough free space, so we will commit the transaction and try
+	 * to reclaim the reservation.
+	 *
+	 * NOTE: In the transaction context, we won't shrink metadata
+	 *       reservation for delalloc, or the deadlock will happen.
+	 */
+	if (num_items > 0 && !h->bytes_reserved) {
+		if (!retries) {
 			retries++;
 			btrfs_commit_transaction(h, root);
 			goto again;
-		} else if (ret == -EAGAIN) {
+		} else {
 			/*
-			 * We have already retried and got EAGAIN, so really we
-			 * don't have space, so set ret to -ENOSPC.
+			 * We have already retried, so really we don't have
+			 * space, so set ret to -ENOSPC.
 			 */
 			ret = -ENOSPC;
-		}
-
-		if (ret < 0) {
 			btrfs_end_transaction(h, root);
 			return ERR_PTR(ret);
 		}
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-06-16 17:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-15 10:47 [PATCH 2/2] btrfs: fix deadlock when doing reservation Miao Xie
2011-06-15 13:44 ` Josef Bacik
2011-06-16 14:36   ` Mitch Harder
2011-06-16 14:52     ` Josef Bacik
2011-06-16 17:17       ` Mitch Harder
2011-06-16 17:17         ` Josef Bacik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).