linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: robbieko <robbieko@synology.com>
To: linux-btrfs@vger.kernel.org
Cc: Robbie Ko <robbieko@synology.com>
Subject: [PATCH] btrfs: reduce lock contention when eb cache miss for btree search
Date: Wed,  9 Oct 2024 10:01:49 +0800	[thread overview]
Message-ID: <20241009020149.23467-1-robbieko@synology.com> (raw)

From: Robbie Ko <robbieko@synology.com>

When crawling btree, if an eb cache miss occurs, we change to use
the eb read lock and release all previous locks to reduce lock contention.

Because we have prepared the check parameters and the read lock
of eb we hold, we can ensure that no race will occur during the check
and cause unexpected errors.

Signed-off-by: Robbie Ko <robbieko@synology.com>
---
 fs/btrfs/ctree.c | 88 ++++++++++++++++++++++++++++--------------------
 1 file changed, 52 insertions(+), 36 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0cc919d15b14..0efbe61497f3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1515,12 +1515,12 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	struct btrfs_tree_parent_check check = { 0 };
 	u64 blocknr;
 	u64 gen;
-	struct extent_buffer *tmp;
-	int ret;
+	struct extent_buffer *tmp = NULL;
+	int ret, err;
 	int parent_level;
-	bool unlock_up;
+	bool create = false;
+	bool lock = false;
 
-	unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
 	blocknr = btrfs_node_blockptr(*eb_ret, slot);
 	gen = btrfs_node_ptr_generation(*eb_ret, slot);
 	parent_level = btrfs_header_level(*eb_ret);
@@ -1551,52 +1551,66 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 			 */
 			if (btrfs_verify_level_key(tmp,
 					parent_level - 1, &check.first_key, gen)) {
-				free_extent_buffer(tmp);
-				return -EUCLEAN;
+				ret = -EUCLEAN;
+				goto out;
 			}
 			*eb_ret = tmp;
-			return 0;
+			tmp = NULL;
+			ret = 0;
+			goto out;
 		}
 
 		if (p->nowait) {
-			free_extent_buffer(tmp);
-			return -EAGAIN;
+			ret = -EAGAIN;
+			btrfs_release_path(p);
+			goto out;
 		}
 
-		if (unlock_up)
-			btrfs_unlock_up_safe(p, level + 1);
+		ret = -EAGAIN;
+		btrfs_unlock_up_safe(p, level + 1);
+		btrfs_tree_read_lock(tmp);
+		lock = true;
+		btrfs_release_path(p);
 
 		/* now we're allowed to do a blocking uptodate check */
-		ret = btrfs_read_extent_buffer(tmp, &check);
-		if (ret) {
-			free_extent_buffer(tmp);
-			btrfs_release_path(p);
-			return ret;
+		err = btrfs_read_extent_buffer(tmp, &check);
+		if (err) {
+			ret = err;
+			goto out;
 		}
-
-		if (unlock_up)
-			ret = -EAGAIN;
-
 		goto out;
 	} else if (p->nowait) {
-		return -EAGAIN;
-	}
-
-	if (unlock_up) {
-		btrfs_unlock_up_safe(p, level + 1);
 		ret = -EAGAIN;
-	} else {
-		ret = 0;
+		btrfs_release_path(p);
+		goto out;
 	}
 
+	ret = -EAGAIN;
+	btrfs_unlock_up_safe(p, level + 1);
+
 	if (p->reada != READA_NONE)
 		reada_for_search(fs_info, p, level, slot, key->objectid);
 
-	tmp = read_tree_block(fs_info, blocknr, &check);
+	tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level);
 	if (IS_ERR(tmp)) {
+		ret = PTR_ERR(tmp);
+		tmp = NULL;
 		btrfs_release_path(p);
-		return PTR_ERR(tmp);
+		goto out;
 	}
+	create = true;
+
+	btrfs_tree_read_lock(tmp);
+	lock = true;
+	btrfs_release_path(p);
+
+	/* now we're allowed to do a blocking uptodate check */
+	err = btrfs_read_extent_buffer(tmp, &check);
+	if (err) {
+		ret = err;
+		goto out;
+	}
+
 	/*
 	 * If the read above didn't mark this buffer up to date,
 	 * it will never end up being up to date.  Set ret to EIO now
@@ -1607,11 +1621,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 		ret = -EIO;
 
 out:
-	if (ret == 0) {
-		*eb_ret = tmp;
-	} else {
-		free_extent_buffer(tmp);
-		btrfs_release_path(p);
+	if (tmp) {
+		if (lock)
+			btrfs_tree_read_unlock(tmp);
+		if (create && ret && ret != -EAGAIN)
+			free_extent_buffer_stale(tmp);
+		else
+			free_extent_buffer(tmp);
 	}
 
 	return ret;
@@ -2198,7 +2214,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		}
 
 		err = read_block_for_search(root, p, &b, level, slot, key);
-		if (err == -EAGAIN)
+		if (err == -EAGAIN && !p->nowait)
 			goto again;
 		if (err) {
 			ret = err;
@@ -2325,7 +2341,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
 		}
 
 		err = read_block_for_search(root, p, &b, level, slot, key);
-		if (err == -EAGAIN)
+		if (err == -EAGAIN && !p->nowait)
 			goto again;
 		if (err) {
 			ret = err;
-- 
2.17.1


             reply	other threads:[~2024-10-09  2:09 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-09  2:01 robbieko [this message]
2024-10-09 14:18 ` [PATCH] btrfs: reduce lock contention when eb cache miss for btree search Filipe Manana
2024-10-09 14:20   ` Filipe Manana
2024-10-09 15:09   ` Filipe Manana
2024-10-11  2:40     ` robbieko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241009020149.23467-1-robbieko@synology.com \
    --to=robbieko@synology.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).