public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] btrfs: allow extent buffer helpers to skip cross-page handling
@ 2023-11-16  5:19 Qu Wenruo
  2023-11-20 17:00 ` David Sterba
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Qu Wenruo @ 2023-11-16  5:19 UTC (permalink / raw)
  To: linux-btrfs

Currently btrfs extent buffer helpers are doing all the cross-page
handling, as there is no guarantee that all those eb pages are
contiguous.

However on systems with enough memory, there is a very high chance the
page cache for btree_inode are allocated with physically contiguous
pages.

In that case, we can skip all the complex cross-page handling, thus
speeding up the code.

This patch adds a new member, extent_buffer::addr, which is only set to
non-NULL if all the extent buffer pages are physically contiguous.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Reason for RFC:

This change would increase the code size for all extent buffer helpers,
and since there one more branch introduced, it may even slow down the
system if most ebs do not have physically contiguous pages.

But I still believe this is worthy trying, as my previous attempt to
use virtually contiguous pages are rejected due to possible slow down in
vm_map() call.

I don't have convincing benchmark yet, but so far no obvious performance
drop observed either.
---
 fs/btrfs/disk-io.c   |  9 +++++++-
 fs/btrfs/extent_io.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/extent_io.h |  7 ++++++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5ac6789ca55f..7fc78171a262 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -80,8 +80,16 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 	char *kaddr;
 	int i;
 
+	memset(result, 0, BTRFS_CSUM_SIZE);
 	shash->tfm = fs_info->csum_shash;
 	crypto_shash_init(shash);
+
+	if (buf->addr) {
+		crypto_shash_digest(shash, buf->addr + offset_in_page(buf->start) + BTRFS_CSUM_SIZE,
+				    buf->len - BTRFS_CSUM_SIZE, result);
+		return;
+	}
+
 	kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
 	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
 			    first_page_part - BTRFS_CSUM_SIZE);
@@ -90,7 +98,6 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 		kaddr = page_address(buf->pages[i]);
 		crypto_shash_update(shash, kaddr, PAGE_SIZE);
 	}
-	memset(result, 0, BTRFS_CSUM_SIZE);
 	crypto_shash_final(shash, result);
 }
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 03cef28d9e37..004b0ba6b1c7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3476,6 +3476,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
 	struct btrfs_subpage *prealloc = NULL;
 	u64 lockdep_owner = owner_root;
+	bool page_contig = true;
 	int uptodate = 1;
 	int ret;
 
@@ -3562,6 +3563,14 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 		WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
 		eb->pages[i] = p;
+
+		/*
+		 * Check if the current page is physically contiguous with previous eb
+		 * page.
+		 */
+		if (i && eb->pages[i - 1] + 1 != p)
+			page_contig = false;
+
 		if (!btrfs_page_test_uptodate(fs_info, p, eb->start, eb->len))
 			uptodate = 0;
 
@@ -3575,6 +3584,9 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	}
 	if (uptodate)
 		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+	/* All pages are physically contiguous, can skip cross page handling. */
+	if (page_contig)
+		eb->addr = page_address(eb->pages[0]) + offset_in_page(eb->start);
 again:
 	ret = radix_tree_preload(GFP_NOFS);
 	if (ret) {
@@ -4009,6 +4021,11 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 		return;
 	}
 
+	if (eb->addr) {
+		memcpy(dstv, eb->addr + start, len);
+		return;
+	}
+
 	offset = get_eb_offset_in_page(eb, start);
 
 	while (len > 0) {
@@ -4040,6 +4057,12 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
+	if (eb->addr) {
+		if (copy_to_user_nofault(dstv, eb->addr + start, len))
+			ret = EFAULT;
+		return ret;
+	}
+
 	offset = get_eb_offset_in_page(eb, start);
 
 	while (len > 0) {
@@ -4075,6 +4098,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 	if (check_eb_range(eb, start, len))
 		return -EINVAL;
 
+	if (eb->addr)
+		return memcmp(ptrv, eb->addr + start, len);
+
 	offset = get_eb_offset_in_page(eb, start);
 
 	while (len > 0) {
@@ -4144,6 +4170,14 @@ static void __write_extent_buffer(const struct extent_buffer *eb,
 	if (check_eb_range(eb, start, len))
 		return;
 
+	if (eb->addr) {
+		if (use_memmove)
+			memmove(eb->addr + start, srcv, len);
+		else
+			memcpy(eb->addr + start, srcv, len);
+		return;
+	}
+
 	offset = get_eb_offset_in_page(eb, start);
 
 	while (len > 0) {
@@ -4176,6 +4210,11 @@ static void memset_extent_buffer(const struct extent_buffer *eb, int c,
 {
 	unsigned long cur = start;
 
+	if (eb->addr) {
+		memset(eb->addr + start, c, len);
+		return;
+	}
+
 	while (cur < start + len) {
 		unsigned long index = get_eb_page_index(cur);
 		unsigned int offset = get_eb_offset_in_page(eb, cur);
@@ -4403,6 +4442,17 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
 	    check_eb_range(dst, src_offset, len))
 		return;
 
+	if (dst->addr) {
+		const bool use_memmove = areas_overlap(src_offset,
+						       dst_offset, len);
+
+		if (use_memmove)
+			memmove(dst->addr + dst_offset, dst->addr + src_offset, len);
+		else
+			memcpy(dst->addr + dst_offset, dst->addr + src_offset, len);
+		return;
+	}
+
 	while (cur_off < len) {
 		unsigned long cur_src = cur_off + src_offset;
 		unsigned long pg_index = get_eb_page_index(cur_src);
@@ -4435,6 +4485,11 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 		return;
 	}
 
+	if (dst->addr) {
+		memmove(dst->addr + dst_offset, dst->addr + src_offset, len);
+		return;
+	}
+
 	while (len > 0) {
 		unsigned long src_i;
 		size_t cur;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2171057a4477..a88374ad248f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -77,6 +77,13 @@ struct extent_buffer {
 	unsigned long len;
 	unsigned long bflags;
 	struct btrfs_fs_info *fs_info;
+
+	/*
+	 * The address where the eb can be accessed without any cross-page handling.
+	 * This can be NULL if not possible.
+	 */
+	void *addr;
+
 	spinlock_t refs_lock;
 	atomic_t refs;
 	int read_mirror;
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-11-24 16:10 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-16  5:19 [PATCH RFC] btrfs: allow extent buffer helpers to skip cross-page handling Qu Wenruo
2023-11-20 17:00 ` David Sterba
2023-11-20 20:25   ` Qu Wenruo
2023-11-21 15:35     ` David Sterba
2023-11-21 20:37       ` Qu Wenruo
2023-11-21 21:14         ` David Sterba
2023-11-21 21:30           ` Qu Wenruo
2023-11-22 13:23             ` David Sterba
2023-11-22 13:34 ` David Sterba
2023-11-22 13:46 ` David Sterba
2023-11-22 20:01   ` Qu Wenruo
2023-11-22 22:05     ` David Sterba
2023-11-23 18:50     ` David Sterba
2023-11-23 20:51     ` Qu Wenruo
2023-11-24 16:03       ` David Sterba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox