* [PATCH v3 1/8] btrfs: tests: enhance extent buffer bitmap tests
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 2/8] btrfs: tests: add self tests for extent buffer memory operations Qu Wenruo
` (8 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs; +Cc: Sweet Tea Dorminy, David Sterba
Enhance extent bitmap tests for the following aspects:
- Remove unnecessary @len from __test_eb_bitmaps()
We can fetch the length from extent buffer
- Explicitly distinguish bit and byte length
Now every start/len inside bitmap tests would have either "byte_" or
"bit_" prefix to make it more explicit.
- Better error reporting
If we have mismatch bits, the error report would dump the following
contents:
* start bytenr
* bit number
* the full byte from bitmap
* the full byte from the extent
This is to save developers time so obvious problem can be found
immediately
- Extract bitmap set/clear and check operation into two helpers
This is to save some code lines, as we will have more tests to do.
- Add new tests
The following tests are added, mostly for the incoming extent bitmap
accessor refactoring:
* Set bits inside the same byte
* Clear bits inside the same byte
* Cross byte boundary set
* Cross byte boundary clear
* Cross multi-byte boundary set
* Cross multi-byte boundary clear
Those new tests have already saved my backend for the incoming extent
buffer bitmap refactoring.
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/tests/extent-io-tests.c | 162 +++++++++++++++++++++----------
1 file changed, 109 insertions(+), 53 deletions(-)
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index f6bc6d738555..3e625c558b0b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -319,86 +319,139 @@ static int test_find_delalloc(u32 sectorsize)
return ret;
}
-static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
- unsigned long len)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
{
unsigned long i;
- for (i = 0; i < len * BITS_PER_BYTE; i++) {
+ for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
int bit, bit1;
bit = !!test_bit(i, bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
- test_err("bits do not match");
+ u8 has;
+ u8 expect;
+
+ read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+ expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+ test_err(
+ "bits do not match, start byte 0 bit %lu, byte %lu has 0x%02x expect 0x%02x",
+ i, i / BITS_PER_BYTE, has, expect);
return -EINVAL;
}
bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
i % BITS_PER_BYTE);
if (bit1 != bit) {
- test_err("offset bits do not match");
+ u8 has;
+ u8 expect;
+
+ read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+ expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+ test_err(
+ "bits do not match, start byte %lu bit %lu, byte %lu has 0x%02x expect 0x%02x",
+ i / BITS_PER_BYTE, i % BITS_PER_BYTE,
+ i / BITS_PER_BYTE, has, expect);
return -EINVAL;
}
}
return 0;
}
-static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
- unsigned long len)
+static int test_bitmap_set(const char *name, unsigned long *bitmap,
+ struct extent_buffer *eb,
+ unsigned long byte_start, unsigned long bit_start,
+ unsigned long bit_len)
+{
+ int ret;
+
+ bitmap_set(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+ extent_buffer_bitmap_set(eb, byte_start, bit_start, bit_len);
+ ret = check_eb_bitmap(bitmap, eb);
+ if (ret < 0)
+ test_err("%s test failed", name);
+ return ret;
+}
+
+static int test_bitmap_clear(const char *name, unsigned long *bitmap,
+ struct extent_buffer *eb,
+ unsigned long byte_start, unsigned long bit_start,
+ unsigned long bit_len)
+{
+ int ret;
+
+ bitmap_clear(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+ extent_buffer_bitmap_clear(eb, byte_start, bit_start, bit_len);
+ ret = check_eb_bitmap(bitmap, eb);
+ if (ret < 0)
+ test_err("%s test failed", name);
+ return ret;
+}
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
{
unsigned long i, j;
+ unsigned long byte_len = eb->len;
u32 x;
int ret;
- memset(bitmap, 0, len);
- memzero_extent_buffer(eb, 0, len);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_err("bitmap was not zeroed");
- return -EINVAL;
- }
-
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("setting all bits failed");
+ ret = test_bitmap_clear("clear all run 1", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
return ret;
- }
- bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("clearing all bits failed");
+ ret = test_bitmap_set("set all", bitmap, eb, 0, 0, byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("clear all run 2", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("same byte set", bitmap, eb, 0, 2, 4);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("same byte partial clear", bitmap, eb, 0, 4, 1);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("cross byte set", bitmap, eb, 2, 4, 8);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("cross multi byte set", bitmap, eb, 4, 4, 24);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("cross byte clear", bitmap, eb, 2, 6, 4);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("cross multi byte clear", bitmap, eb, 4, 6, 20);
+ if (ret < 0)
return ret;
- }
/* Straddling pages test */
- if (len > PAGE_SIZE) {
- bitmap_set(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("setting straddling pages failed");
+ if (byte_len > PAGE_SIZE) {
+ ret = test_bitmap_set("cross page set", bitmap, eb,
+ PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (ret < 0)
return ret;
- }
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- bitmap_clear(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("clearing straddling pages failed");
+ ret = test_bitmap_set("cross page set all", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("cross page clear", bitmap, eb,
+ PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (ret < 0)
return ret;
- }
}
/*
@@ -406,9 +459,12 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
* something repetitive that could miss some hypothetical off-by-n bug.
*/
x = 0;
- bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
- for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+ ret = test_bitmap_clear("clear all run 3", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < byte_len * BITS_PER_BYTE / 32; i++) {
x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
for (j = 0; j < 32; j++) {
if (x & (1U << j)) {
@@ -418,7 +474,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
}
}
- ret = check_eb_bitmap(bitmap, eb, len);
+ ret = check_eb_bitmap(bitmap, eb);
if (ret) {
test_err("random bit pattern failed");
return ret;
@@ -456,7 +512,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
goto out;
}
- ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+ ret = __test_eb_bitmaps(bitmap, eb);
if (ret)
goto out;
@@ -473,7 +529,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
goto out;
}
- ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+ ret = __test_eb_bitmaps(bitmap, eb);
out:
free_extent_buffer(eb);
kfree(bitmap);
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 2/8] btrfs: tests: add self tests for extent buffer memory operations
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 1/8] btrfs: tests: enhance extent buffer bitmap tests Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 3/8] btrfs: refactor extent buffer bitmaps operations Qu Wenruo
` (7 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs
The new self tests would populate a memory range with random bytes, then
copy it to the extent buffer, so that we can verify if the extent buffer
memory operation and memmove()/memcopy() are resulting the same
contents.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/tests/extent-io-tests.c | 147 +++++++++++++++++++++++++++++++
1 file changed, 147 insertions(+)
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 3e625c558b0b..258b0dcffa62 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -648,6 +648,149 @@ static int test_find_first_clear_extent_bit(void)
return ret;
}
+static void dump_eb_and_memory_contents(struct extent_buffer *eb, void *memory,
+ const char *test_name)
+{
+ for (int i = 0; i < eb->len; i++) {
+ struct page *page = eb->pages[i >> PAGE_SHIFT];
+ void *addr = page_address(page) + offset_in_page(i);
+
+ if (memcmp(addr, memory + i, 1)) {
+ test_err("%s failed", test_name);
+ test_err("eb and memory diffs at byte %u, eb has 0x%02x memory has 0x%02x",
+ i, *(u8 *)addr, *(u8 *)(memory + i));
+ return;
+ }
+ }
+}
+
+static int verify_eb_and_memory(struct extent_buffer *eb, void *memory,
+ const char *test_name)
+{
+ int ret;
+
+ for (int i = 0; i < (eb->len >> PAGE_SHIFT); i++) {
+ void *eb_addr = page_address(eb->pages[i]);
+
+ ret = memcmp(memory + (i << PAGE_SHIFT), eb_addr, PAGE_SIZE);
+ if (ret) {
+ dump_eb_and_memory_contents(eb, memory, test_name);
+ return -EUCLEAN;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Init both memory and extent buffer contents to the same randomly generated
+ * contents.
+ */
+static void init_eb_and_memory(struct extent_buffer *eb, void *memory)
+{
+ get_random_bytes(memory, eb->len);
+ write_extent_buffer(eb, memory, 0, eb->len);
+}
+
+static int test_eb_mem_ops(u32 sectorsize, u32 nodesize)
+{
+ struct btrfs_fs_info *fs_info;
+ struct extent_buffer *eb = NULL;
+ void *memory = NULL;
+ int ret;
+
+ test_msg("running extent buffer memory operation tests");
+
+ fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
+ return -ENOMEM;
+ }
+
+ memory = kvzalloc(nodesize, GFP_KERNEL);
+ if (!memory) {
+ test_err("failed to allocate memory");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize);
+ if (!eb) {
+ test_std_err(TEST_ALLOC_EXTENT_BUFFER);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ init_eb_and_memory(eb, memory);
+ ret = verify_eb_and_memory(eb, memory, "full eb write");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory, memory + 16, 16);
+ memcpy_extent_buffer(eb, 0, 16, 16);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory, memory + 2048, 16);
+ memcpy_extent_buffer(eb, 0, 2048, 16);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ memcpy(memory, memory + 2048, 2048);
+ memcpy_extent_buffer(eb, 0, 2048, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 3");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 512, memory + 256, 512);
+ memmove_extent_buffer(eb, 512, 256, 512);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 2048, memory + 512, 2048);
+ memmove_extent_buffer(eb, 2048, 512, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ memmove(memory + 512, memory + 2048, 2048);
+ memmove_extent_buffer(eb, 512, 2048, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 3");
+ if (ret < 0)
+ goto out;
+
+ if (nodesize > PAGE_SIZE) {
+ memcpy(memory, memory + 4096 - 128, 256);
+ memcpy_extent_buffer(eb, 0, 4096 - 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory + 4096 - 128, memory + 4096 + 128, 256);
+ memcpy_extent_buffer(eb, 4096 - 128, 4096 + 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 4096 - 128, memory + 4096 - 64, 256);
+ memmove_extent_buffer(eb, 4096 - 128, 4096 - 64, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 4096 - 64, memory + 4096 - 128, 256);
+ memmove_extent_buffer(eb, 4096 - 64, 4096 - 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ }
+out:
+ free_extent_buffer(eb);
+ kvfree(memory);
+ btrfs_free_dummy_fs_info(fs_info);
+ return ret;
+}
+
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
@@ -663,6 +806,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
goto out;
ret = test_eb_bitmaps(sectorsize, nodesize);
+ if (ret)
+ goto out;
+
+ ret = test_eb_mem_ops(sectorsize, nodesize);
out:
return ret;
}
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 3/8] btrfs: refactor extent buffer bitmaps operations
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 1/8] btrfs: tests: enhance extent buffer bitmap tests Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 2/8] btrfs: tests: add self tests for extent buffer memory operations Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 4/8] btrfs: use write_extent_buffer() to implement write_extent_buffer_*id() Qu Wenruo
` (6 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs; +Cc: Sweet Tea Dorminy, David Sterba
[BACKGROUND]
Currently we handle extent bitmaps manually in
extent_buffer_bitmap_set() and extent_buffer_bitmap_clear().
Although with various helpers like eb_bitmap_offset() it's still a little
messy to read. The code seems to be a copy of bitmap_set(), but with
all the cross-page handling embedded into the code.
[ENHANCEMENT]
This patch would enhance the readability by introducing two helpers:
- memset_extent_buffer()
To handle the byte aligned range, thus all the cross-page handling is
done there.
- extent_buffer_get_byte()
This for the first and the last byte operations, which only need to
grab one byte, thus no need for any cross-page handling.
So we can split both extent_buffer_bitmap_set() and
extent_buffer_bitmap_clear() into 3 parts:
- Handle the first byte
If the range fits inside the first byte, we can exit early.
- Handle the byte aligned part
This is the part which can have cross-page operations, and it would
be handled by memset_extent_buffer().
- Handle the last byte
This refactoring does not only make the code a little easier to read,
but also makes later folio/page switch much easier, as the switch only
needs to be done inside memset_extent_buffer() and extent_buffer_get_byte().
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.c | 141 ++++++++++++++++++++-----------------------
1 file changed, 67 insertions(+), 74 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a845a90d46f7..4acc6d05c467 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4229,32 +4229,30 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
}
}
-void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
- unsigned long len)
+static void memset_extent_buffer(const struct extent_buffer *eb, int c,
+ unsigned long start, unsigned long len)
{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- unsigned long i = get_eb_page_index(start);
+ unsigned long cur = start;
+ while (cur < start + len) {
+ unsigned long index = get_eb_page_index(cur);
+ unsigned int offset = get_eb_offset_in_page(eb, cur);
+ unsigned int cur_len = min(start + len - cur, PAGE_SIZE - offset);
+ struct page *page = eb->pages[index];
+
+ assert_eb_page_uptodate(eb, page);
+ memset(page_address(page) + offset, c, cur_len);
+
+ cur += cur_len;
+ }
+}
+
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
+ unsigned long len)
+{
if (check_eb_range(eb, start, len))
return;
-
- offset = get_eb_offset_in_page(eb, start);
-
- while (len > 0) {
- page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
-
- cur = min(len, PAGE_SIZE - offset);
- kaddr = page_address(page);
- memset(kaddr + offset, 0, cur);
-
- len -= cur;
- offset = 0;
- i++;
- }
+ return memset_extent_buffer(eb, 0, start, len);
}
void copy_extent_buffer_full(const struct extent_buffer *dst,
@@ -4371,6 +4369,15 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
}
+static u8 *extent_buffer_get_byte(const struct extent_buffer *eb, unsigned long bytenr)
+{
+ unsigned long index = get_eb_page_index(bytenr);
+
+ if (check_eb_range(eb, bytenr, 1))
+ return NULL;
+ return page_address(eb->pages[index]) + get_eb_offset_in_page(eb, bytenr);
+}
+
/*
* Set an area of a bitmap to 1.
*
@@ -4382,35 +4389,28 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
{
+ unsigned int first_byte = start + BIT_BYTE(pos);
+ unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+ const bool same_byte = (first_byte == last_byte);
+ u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
u8 *kaddr;
- struct page *page;
- unsigned long i;
- size_t offset;
- const unsigned int size = pos + len;
- int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
- eb_bitmap_offset(eb, start, pos, &i, &offset);
- page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
+ if (same_byte)
+ mask &= BITMAP_LAST_BYTE_MASK(pos + len);
- while (len >= bits_to_set) {
- kaddr[offset] |= mask_to_set;
- len -= bits_to_set;
- bits_to_set = BITS_PER_BYTE;
- mask_to_set = ~0;
- if (++offset >= PAGE_SIZE && len > 0) {
- offset = 0;
- page = eb->pages[++i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
- }
- }
- if (len) {
- mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
- kaddr[offset] |= mask_to_set;
- }
+ /* Handle the first byte. */
+ kaddr = extent_buffer_get_byte(eb, first_byte);
+ *kaddr |= mask;
+ if (same_byte)
+ return;
+
+ /* Handle the byte aligned part. */
+ ASSERT(first_byte + 1 <= last_byte);
+ memset_extent_buffer(eb, 0xff, first_byte + 1, last_byte - first_byte - 1);
+
+ /* Handle the last byte. */
+ kaddr = extent_buffer_get_byte(eb, last_byte);
+ *kaddr |= BITMAP_LAST_BYTE_MASK(pos + len);
}
@@ -4426,35 +4426,28 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len)
{
+ unsigned int first_byte = start + BIT_BYTE(pos);
+ unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+ const bool same_byte = (first_byte == last_byte);
+ u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
u8 *kaddr;
- struct page *page;
- unsigned long i;
- size_t offset;
- const unsigned int size = pos + len;
- int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
- eb_bitmap_offset(eb, start, pos, &i, &offset);
- page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
+ if (same_byte)
+ mask &= BITMAP_LAST_BYTE_MASK(pos + len);
- while (len >= bits_to_clear) {
- kaddr[offset] &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_BYTE;
- mask_to_clear = ~0;
- if (++offset >= PAGE_SIZE && len > 0) {
- offset = 0;
- page = eb->pages[++i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
- }
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
- kaddr[offset] &= ~mask_to_clear;
- }
+ /* Handle the first byte. */
+ kaddr = extent_buffer_get_byte(eb, first_byte);
+ *kaddr &= ~mask;
+ if (same_byte)
+ return;
+
+ /* Handle the byte aligned part. */
+ ASSERT(first_byte + 1 <= last_byte);
+ memset_extent_buffer(eb, 0, first_byte + 1, last_byte - first_byte - 1);
+
+ /* Handle the last byte. */
+ kaddr = extent_buffer_get_byte(eb, last_byte);
+ *kaddr &= ~BITMAP_LAST_BYTE_MASK(pos + len);
}
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 4/8] btrfs: use write_extent_buffer() to implement write_extent_buffer_*id()
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (2 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 3/8] btrfs: refactor extent buffer bitmaps operations Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 5/8] btrfs: refactor main loop in copy_extent_buffer_full() Qu Wenruo
` (5 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs; +Cc: Sweet Tea Dorminy, David Sterba
Helpers write_extent_buffer_chunk_tree_uuid() and
write_extent_buffer_fsid(), they can be implemented by
write_extent_buffer().
These two helpers are not that frequently used, they only get called
during initialization of a new tree block. There is not much need for
those slightly optimized versions. And since they can be easily
converted to one write_extent_buffer() call, define them as inline
helpers.
This would make later page/folio switch much easier, as all change only
need to happen in write_extent_buffer().
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.c | 22 ----------------------
fs/btrfs/extent_io.h | 19 ++++++++++++++++---
2 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4acc6d05c467..aabb59cb3669 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4175,28 +4175,6 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
}
}
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
- const void *srcv)
-{
- char *kaddr;
-
- assert_eb_page_uptodate(eb, eb->pages[0]);
- kaddr = page_address(eb->pages[0]) +
- get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
- chunk_tree_uuid));
- memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
-{
- char *kaddr;
-
- assert_eb_page_uptodate(eb, eb->pages[0]);
- kaddr = page_address(eb->pages[0]) +
- get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
- memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
unsigned long start, unsigned long len)
{
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c5fae3a7d911..5966d810af7b 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -236,11 +236,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
void __user *dst, unsigned long start,
unsigned long len);
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
- const void *src);
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
+
+static inline void write_extent_buffer_chunk_tree_uuid(
+ const struct extent_buffer *eb, const void *chunk_tree_uuid)
+{
+ write_extent_buffer(eb, chunk_tree_uuid,
+ offsetof(struct btrfs_header, chunk_tree_uuid),
+ BTRFS_FSID_SIZE);
+}
+
+static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
+ const void *fsid)
+{
+ write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE);
+}
+
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src);
void copy_extent_buffer(const struct extent_buffer *dst,
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 5/8] btrfs: refactor main loop in copy_extent_buffer_full()
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (3 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 4/8] btrfs: use write_extent_buffer() to implement write_extent_buffer_*id() Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 6/8] btrfs: copy all pages at once at the end of btrfs_clone_extent_buffer() Qu Wenruo
` (4 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs; +Cc: Sweet Tea Dorminy, David Sterba
[BACKGROUND]
copy_extent_buffer_full() currently does different handling for regular
and subpage cases, for regular cases it does a page by page copying.
For subpage cases, it just copies the content.
This is fine for the page based extent buffer code, but for the incoming
folio conversion, it can be a burden to add a new branch just to handle
all the different combinations (subpage vs regular, one single folio vs
multi pages).
[ENHANCE]
Instead of handling the different combinations, just go one single
handling for all cases, utilizing write_extent_buffer() to do the
copying.
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aabb59cb3669..46f72e6623d6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4236,24 +4236,19 @@ void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src)
{
- int i;
- int num_pages;
+ unsigned long cur = 0;
ASSERT(dst->len == src->len);
- if (dst->fs_info->nodesize >= PAGE_SIZE) {
- num_pages = num_extent_pages(dst);
- for (i = 0; i < num_pages; i++)
- copy_page(page_address(dst->pages[i]),
- page_address(src->pages[i]));
- } else {
- size_t src_offset = get_eb_offset_in_page(src, 0);
- size_t dst_offset = get_eb_offset_in_page(dst, 0);
+ while (cur < src->len) {
+ unsigned long index = get_eb_page_index(cur);
+ unsigned long offset = get_eb_offset_in_page(src, cur);
+ unsigned long cur_len = min(src->len, PAGE_SIZE - offset);
+ void *addr = page_address(src->pages[index]) + offset;
- ASSERT(src->fs_info->nodesize < PAGE_SIZE);
- memcpy(page_address(dst->pages[0]) + dst_offset,
- page_address(src->pages[0]) + src_offset,
- src->len);
+ write_extent_buffer(dst, addr, cur, cur_len);
+
+ cur += cur_len;
}
}
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 6/8] btrfs: copy all pages at once at the end of btrfs_clone_extent_buffer()
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (4 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 5/8] btrfs: refactor main loop in copy_extent_buffer_full() Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 7/8] btrfs: refactor main loop in memcpy_extent_buffer() Qu Wenruo
` (3 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs; +Cc: Sweet Tea Dorminy, David Sterba
btrfs_clone_extent_buffer() calls copy_page() at each iteration but we
can copy all pages at the end in one go if there were no errors.
This would make later conversion to folios easier.
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
fs/btrfs/extent_io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 46f72e6623d6..d2a89b04c487 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3285,8 +3285,8 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
return NULL;
}
WARN_ON(PageDirty(p));
- copy_page(page_address(p), page_address(src->pages[i]));
}
+ copy_extent_buffer_full(new, src);
set_extent_buffer_uptodate(new);
return new;
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 7/8] btrfs: refactor main loop in memcpy_extent_buffer()
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (5 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 6/8] btrfs: copy all pages at once at the end of btrfs_clone_extent_buffer() Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-15 11:08 ` [PATCH v3 8/8] btrfs: refactor main loop in memmove_extent_buffer() Qu Wenruo
` (2 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs
[BACKGROUND]
Currently memcpy_extent_buffer() does a loop where it would stop at
any page boundary inside [dst_offset, dst_offset + len) or [src_offset,
src_offset + len).
This is mostly allowing us to do copy_pages(), but if we're going to use
folios we will need to handle multi-page (the old behavior) or single
folio (the new optimization).
The current code would be a burden for future changes.
[ENHANCEMENT]
There is a hidden pitfall of the naming memcpy_extent_buffer(), unlike
regular memcpy(), this function can handle overlapping ranges.
So here we extract write_extent_buffer() into a new internal helper,
__write_extent_buffer(), and add a new parameter @use_memmove, to
indicate whether we should use memmove() or regular memcpy().
Now we can go __write_extent_buffer() to handle writing into the dst
range, with proper overlapping detection.
This has a tiny change to the chance of calling memmove().
As the split only happens at the source range page boundaries, the
memcpy/memmove() range would be slightly larger than the old code,
thus slightly increase the chance we call memmove() other than memcopy().
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 56 ++++++++++++++++++++++++--------------------
1 file changed, 30 insertions(+), 26 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d2a89b04c487..a9ab4d17530e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4175,8 +4175,9 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
}
}
-void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
- unsigned long start, unsigned long len)
+static void __write_extent_buffer(const struct extent_buffer *eb,
+ const void *srcv, unsigned long start,
+ unsigned long len, bool use_memmove)
{
size_t cur;
size_t offset;
@@ -4184,6 +4185,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
char *kaddr;
char *src = (char *)srcv;
unsigned long i = get_eb_page_index(start);
+ /* For unmapped (dummy) ebs, no need to check their uptodate status. */
+ const bool check_uptodate = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
@@ -4194,11 +4197,15 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
while (len > 0) {
page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
+ if (check_uptodate)
+ assert_eb_page_uptodate(eb, page);
cur = min(len, PAGE_SIZE - offset);
kaddr = page_address(page);
- memcpy(kaddr + offset, src, cur);
+ if (use_memmove)
+ memmove(kaddr + offset, src, cur);
+ else
+ memcpy(kaddr + offset, src, cur);
src += cur;
len -= cur;
@@ -4207,6 +4214,12 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
}
}
+void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
+ unsigned long start, unsigned long len)
+{
+ return __write_extent_buffer(eb, srcv, start, len, false);
+}
+
static void memset_extent_buffer(const struct extent_buffer *eb, int c,
unsigned long start, unsigned long len)
{
@@ -4455,34 +4468,25 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- unsigned long dst_i;
- unsigned long src_i;
+ unsigned long cur_off = 0;
if (check_eb_range(dst, dst_offset, len) ||
check_eb_range(dst, src_offset, len))
return;
- while (len > 0) {
- dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
- src_off_in_page = get_eb_offset_in_page(dst, src_offset);
+ while (cur_off < len) {
+ unsigned long cur_src = cur_off + src_offset;
+ unsigned long pg_index = get_eb_page_index(cur_src);
+ unsigned long pg_off = get_eb_offset_in_page(dst, cur_src);
+ unsigned long cur_len = min(src_offset + len - cur_src,
+ PAGE_SIZE - pg_off);
+ void *src_addr = page_address(dst->pages[pg_index]) + pg_off;
+ const bool use_memmove = areas_overlap(src_offset + cur_off,
+ dst_offset + cur_off, cur_len);
- dst_i = get_eb_page_index(dst_offset);
- src_i = get_eb_page_index(src_offset);
-
- cur = min(len, (unsigned long)(PAGE_SIZE -
- src_off_in_page));
- cur = min_t(unsigned long, cur,
- (unsigned long)(PAGE_SIZE - dst_off_in_page));
-
- copy_pages(dst->pages[dst_i], dst->pages[src_i],
- dst_off_in_page, src_off_in_page, cur);
-
- src_offset += cur;
- dst_offset += cur;
- len -= cur;
+ __write_extent_buffer(dst, src_addr, dst_offset + cur_off, cur_len,
+ use_memmove);
+ cur_off += cur_len;
}
}
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v3 8/8] btrfs: refactor main loop in memmove_extent_buffer()
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (6 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 7/8] btrfs: refactor main loop in memcpy_extent_buffer() Qu Wenruo
@ 2023-07-15 11:08 ` Qu Wenruo
2023-07-18 16:01 ` [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion David Sterba
2023-07-20 15:06 ` David Sterba
9 siblings, 0 replies; 17+ messages in thread
From: Qu Wenruo @ 2023-07-15 11:08 UTC (permalink / raw)
To: linux-btrfs
[BACKGROUND]
Currently memove_extent_buffer() does a loop where it strop at any page
boundary inside [dst_offset, dst_offset + len) or [src_offset,
src_offset + len).
This is mostly allowing us to do copy_pages(), but if we're going to use
folios we will need to handle multi-page (the old behavior) or single
folio (the new optimization).
The current code would be a burden for future changes.
[ENHANCEMENT]
Instead of sticking with copy_pages(), here we utilize the new
__write_extent_buffer() helper to handle the writes.
Unlike the refactor in memcpy_extent_buffer(), we can not just rely on
the write_extent_buffer() and only handle page boundaries inside src
range.
The function write_extent_buffer() itself is still doing forward
writing, thus it can not handle the following case: (already in the
extent buffer memory operation tests, cross page overlapping run 2)
Src Page boundary
|///////|
|///|////|
Dst
In above case, if we just following page boundary in the src range, we
have no need to do any split, just one __write_extent_buffer() with
@use_memmove = true.
But __write_extent_buffer() would split the dst range into two,
so it first copies the beginning part of the src range into the first half
of the dst range.
After this operation, the beginning of the dst range is already updated,
causing corruption.
So we have to follow the old behavior of handling both page boundaries.
And since we're the last caller of copy_pages(), we can remove it
completely.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 48 ++++++++++++++++----------------------------
1 file changed, 17 insertions(+), 31 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a9ab4d17530e..b8162725f054 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4442,28 +4442,6 @@ static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned
return distance < len;
}
-static void copy_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- char *src_kaddr;
- int must_memmove = 0;
-
- if (dst_page != src_page) {
- src_kaddr = page_address(src_page);
- } else {
- src_kaddr = dst_kaddr;
- if (areas_overlap(src_off, dst_off, len))
- must_memmove = 1;
- }
-
- if (must_memmove)
- memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
- else
- memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-}
-
void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
@@ -4494,23 +4472,26 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
unsigned long dst_end = dst_offset + len - 1;
unsigned long src_end = src_offset + len - 1;
- unsigned long dst_i;
- unsigned long src_i;
if (check_eb_range(dst, dst_offset, len) ||
check_eb_range(dst, src_offset, len))
return;
+
if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
return;
}
+
while (len > 0) {
- dst_i = get_eb_page_index(dst_end);
+ unsigned long src_i;
+ size_t cur;
+ size_t dst_off_in_page;
+ size_t src_off_in_page;
+ void *src_addr;
+ bool use_memmove;
+
src_i = get_eb_page_index(src_end);
dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
@@ -4518,9 +4499,14 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- copy_pages(dst->pages[dst_i], dst->pages[src_i],
- dst_off_in_page - cur + 1,
- src_off_in_page - cur + 1, cur);
+
+ src_addr = page_address(dst->pages[src_i]) + src_off_in_page -
+ cur + 1;
+ use_memmove = areas_overlap(src_end - cur + 1, dst_end - cur + 1,
+ cur);
+
+ __write_extent_buffer(dst, src_addr, dst_end - cur + 1, cur,
+ use_memmove);
dst_end -= cur;
src_end -= cur;
--
2.41.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (7 preceding siblings ...)
2023-07-15 11:08 ` [PATCH v3 8/8] btrfs: refactor main loop in memmove_extent_buffer() Qu Wenruo
@ 2023-07-18 16:01 ` David Sterba
2023-07-18 22:51 ` Qu Wenruo
2023-07-20 15:06 ` David Sterba
9 siblings, 1 reply; 17+ messages in thread
From: David Sterba @ 2023-07-18 16:01 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs
On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
> [CHANGELOG]
> v2:
> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
>
> v3:
> - Fix an undefined behavior bug in memcpy_extent_buffer()
> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
> ranges, thus it calls copy_pages() which do overlap checks and switch
> to memmove() when needed.
>
> Here we introduce __write_extent_buffer() which allows us to switch
> to go memmove() if needed.
>
> - Also refactor memmove_extent_buffer()
> Since we have __write_extent_buffer() which can go memmove(), it's
> not hard to refactor memmove_extent_buffer().
>
> But there is still a pitfall that we have to handle double page
> boundaries as the old behavior, explained in the last patch.
>
> - Add selftests on extent buffer memory operations
> I have failed too many times refactoring memmove_extent_buffer(), the
> wasted time should be a memorial for my stupidity.
Seems that v3 has proceeded up to btrfs/143 that prints a lot test
output errors and following tests fails too. It's on top of misc-next so
it could be caused by some other recent patch. I'll do another round, if
this patchset turns out to be ok I'll add it to misc-next.
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-18 16:01 ` [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion David Sterba
@ 2023-07-18 22:51 ` Qu Wenruo
2023-07-19 21:49 ` David Sterba
0 siblings, 1 reply; 17+ messages in thread
From: Qu Wenruo @ 2023-07-18 22:51 UTC (permalink / raw)
To: dsterba, Qu Wenruo; +Cc: linux-btrfs
On 2023/7/19 00:01, David Sterba wrote:
> On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
>> [CHANGELOG]
>> v2:
>> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
>>
>> v3:
>> - Fix an undefined behavior bug in memcpy_extent_buffer()
>> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
>> ranges, thus it calls copy_pages() which do overlap checks and switch
>> to memmove() when needed.
>>
>> Here we introduce __write_extent_buffer() which allows us to switch
>> to go memmove() if needed.
>>
>> - Also refactor memmove_extent_buffer()
>> Since we have __write_extent_buffer() which can go memmove(), it's
>> not hard to refactor memmove_extent_buffer().
>>
>> But there is still a pitfall that we have to handle double page
>> boundaries as the old behavior, explained in the last patch.
>>
>> - Add selftests on extent buffer memory operations
>> I have failed too many times refactoring memmove_extent_buffer(), the
>> wasted time should be a memorial for my stupidity.
>
> Seems that v3 has proceeded up to btrfs/143 that prints a lot test
> output errors and following tests fails too. It's on top of misc-next so
> it could be caused by some other recent patch. I'll do another round, if
> this patchset turns out to be ok I'll add it to misc-next.
btrfs/143 has a known (?) regression that dm devices are not properly
cleaned up, causing all later tests to fail (as scratch device is taken
by the dm device, all later mkfs would fail).
I notice that is fixed recently in upstream for-next branch, you may
want to update/rebase your fstests.
Thanks,
Qu
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-18 22:51 ` Qu Wenruo
@ 2023-07-19 21:49 ` David Sterba
0 siblings, 0 replies; 17+ messages in thread
From: David Sterba @ 2023-07-19 21:49 UTC (permalink / raw)
To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, linux-btrfs
On Wed, Jul 19, 2023 at 06:51:18AM +0800, Qu Wenruo wrote:
> On 2023/7/19 00:01, David Sterba wrote:
> > On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
> >> [CHANGELOG]
> >> v2:
> >> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
> >>
> >> v3:
> >> - Fix an undefined behavior bug in memcpy_extent_buffer()
> >> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
> >> ranges, thus it calls copy_pages() which do overlap checks and switch
> >> to memmove() when needed.
> >>
> >> Here we introduce __write_extent_buffer() which allows us to switch
> >> to go memmove() if needed.
> >>
> >> - Also refactor memmove_extent_buffer()
> >> Since we have __write_extent_buffer() which can go memmove(), it's
> >> not hard to refactor memmove_extent_buffer().
> >>
> >> But there is still a pitfall that we have to handle double page
> >> boundaries as the old behavior, explained in the last patch.
> >>
> >> - Add selftests on extent buffer memory operations
> >> I have failed too many times refactoring memmove_extent_buffer(), the
> >> wasted time should be a memorial for my stupidity.
> >
> > Seems that v3 has proceeded up to btrfs/143 that prints a lot test
> > output errors and following tests fails too. It's on top of misc-next so
> > it could be caused by some other recent patch. I'll do another round, if
> > this patchset turns out to be ok I'll add it to misc-next.
>
> btrfs/143 has a known (?) regression that dm devices are not properly
> cleaned up, causing all later tests to fail (as scratch device is taken
> by the dm device, all later mkfs would fail).
>
> I notice that is fixed recently in upstream for-next branch, you may
> want to update/rebase your fstests.
That's quite possible, thanks. I've updated the VMs and restarted tests,
we'll see.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-15 11:08 [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion Qu Wenruo
` (8 preceding siblings ...)
2023-07-18 16:01 ` [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion David Sterba
@ 2023-07-20 15:06 ` David Sterba
2023-07-20 22:15 ` Qu Wenruo
9 siblings, 1 reply; 17+ messages in thread
From: David Sterba @ 2023-07-20 15:06 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs
On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
> [CHANGELOG]
> v2:
> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
>
> v3:
> - Fix an undefined behavior bug in memcpy_extent_buffer()
> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
> ranges, thus it calls copy_pages() which do overlap checks and switch
> to memmove() when needed.
>
> Here we introduce __write_extent_buffer() which allows us to switch
> to go memmove() if needed.
>
> - Also refactor memmove_extent_buffer()
> Since we have __write_extent_buffer() which can go memmove(), it's
> not hard to refactor memmove_extent_buffer().
>
> But there is still a pitfall that we have to handle double page
> boundaries as the old behavior, explained in the last patch.
>
> - Add selftests on extent buffer memory operations
> I have failed too many times refactoring memmove_extent_buffer(), the
> wasted time should be a memorial for my stupidity.
btrfs/125 kasan complains:
btrfs/125 [01:09:17][12387.340788] run fstests btrfs/125 at 2023-07-20 01:09:18
[12389.539422] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e devid 1 transid 297 /dev/vda scanned by mount (1360)
[12389.543907] BTRFS info (device vda): using sha256 (sha256-generic) checksum algorithm
[12389.545345] BTRFS info (device vda): using free space tree
[12389.568662] BTRFS info (device vda): auto enabling async discard
[12393.628549] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 1 transid 6 /dev/vdb scanned by mkfs.btrfs (1544)
[12393.630846] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 2 transid 6 /dev/vdc scanned by mkfs.btrfs (1544)
[12393.633042] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 3 transid 6 /dev/vdd scanned by mkfs.btrfs (1544)
[12393.675240] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
[12393.676651] BTRFS info (device vdb): using free space tree
[12393.705607] BTRFS info (device vdb): auto enabling async discard
[12393.708477] BTRFS info (device vdb): checking UUID tree
[12394.479228] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 2 transid 8 /dev/vdc scanned by mount (1573)
[12394.481329] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 1 transid 8 /dev/vdb scanned by mount (1573)
[12394.484821] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
[12394.486018] BTRFS info (device vdb): allowing degraded mounts
[12394.486801] BTRFS info (device vdb): using free space tree
[12394.495639] BTRFS warning (device vdb): devid 3 uuid 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
[12394.499898] BTRFS warning (device vdb): devid 3 uuid 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
[12394.523726] BTRFS info (device vdb): auto enabling async discard
[12398.021206] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e devid 1 transid 298 /dev/vda scanned by btrfs (1597)
[12398.066913] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
[12398.068414] BTRFS info (device vdb): using free space tree
[12398.080629] BTRFS error (device vdb): bad tree block start, mirror 1 want 40239104 have 31129600
[12398.085719] BTRFS info (device vdb): read error corrected: ino 0 off 40239104 (dev /dev/vdd sector 19840)
[12398.087705] BTRFS info (device vdb): read error corrected: ino 0 off 40243200 (dev /dev/vdd sector 19848)
[12398.089689] BTRFS info (device vdb): read error corrected: ino 0 off 40247296 (dev /dev/vdd sector 19856)
[12398.091575] BTRFS info (device vdb): read error corrected: ino 0 off 40251392 (dev /dev/vdd sector 19864)
[12398.093929] BTRFS error (device vdb): bad tree block start, mirror 1 want 40255488 have 31145984
[12398.097548] BTRFS info (device vdb): read error corrected: ino 0 off 40255488 (dev /dev/vdd sector 19872)
[12398.099311] BTRFS info (device vdb): read error corrected: ino 0 off 40259584 (dev /dev/vdd sector 19880)
[12398.101038] BTRFS info (device vdb): read error corrected: ino 0 off 40263680 (dev /dev/vdd sector 19888)
[12398.102663] BTRFS info (device vdb): read error corrected: ino 0 off 40267776 (dev /dev/vdd sector 19896)
[12398.105020] BTRFS error (device vdb): bad tree block start, mirror 1 want 40271872 have 31162368
[12398.107479] BTRFS info (device vdb): read error corrected: ino 0 off 40271872 (dev /dev/vdd sector 19904)
[12398.109094] BTRFS info (device vdb): read error corrected: ino 0 off 40275968 (dev /dev/vdd sector 19912)
[12398.111111] BTRFS error (device vdb): bad tree block start, mirror 1 want 40222720 have 31113216
[12398.121818] BTRFS info (device vdb): auto enabling async discard
[12398.219247] BTRFS error (device vdb): bad tree block start, mirror 1 want 40288256 have 31178752
[12398.233989] BTRFS info (device vdb): balance: start -d -m -s
[12398.235327] BTRFS info (device vdb): relocating block group 2365194240 flags data|raid5
[12398.310482] BTRFS error (device vdb): bad tree block start, mirror 1 want 40189952 have 31080448
[12398.482607] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
[12398.489325] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
[12398.493394] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
[12398.496146] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
[12398.499510] BTRFS error (device vdb): parent transid verify failed on logical 39108608 mirror 1 wanted 9 found 7
[12398.736591] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
[12398.740199] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
[12398.907346] BTRFS info (device vdb): balance: ended with status: -5
[12399.168513] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
[12399.174882] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
[12399.180441] ==================================================================
[12399.183100] BUG: KASAN: slab-use-after-free in btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
[12399.186056] Read of size 8 at addr ffff888029c96c80 by task kworker/u8:4/21890
[12399.188440]
[12399.188965] CPU: 1 PID: 21890 Comm: kworker/u8:4 Not tainted 6.5.0-rc2-default+ #2130
[12399.191616] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
[12399.193366] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
[12399.194534] Call Trace:
[12399.195039] <TASK>
[12399.195484] dump_stack_lvl+0x46/0x70
[12399.196182] print_address_description.constprop.0+0x30/0x420
[12399.197136] ? preempt_count_sub+0x18/0xc0
[12399.197858] print_report+0xb0/0x260
[12399.198497] ? __virt_addr_valid+0xbb/0xf0
[12399.199204] ? kasan_addr_to_slab+0x94/0xc0
[12399.199936] kasan_report+0xbe/0xf0
[12399.200562] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
[12399.201618] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
[12399.202667] btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
[12399.203703] ? lock_sync+0x100/0x100
[12399.204344] ? try_to_wake_up+0x50/0x880
[12399.205025] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
[12399.206116] ? mark_held_locks+0x1a/0x80
[12399.206802] process_one_work+0x504/0xa00
[12399.207530] ? pwq_dec_nr_in_flight+0x100/0x100
[12399.208305] ? worker_thread+0x160/0x630
[12399.208996] worker_thread+0x8e/0x630
[12399.209638] ? __kthread_parkme+0xd8/0xf0
[12399.210331] ? process_one_work+0xa00/0xa00
[12399.211032] kthread+0x198/0x1e0
[12399.211634] ? kthread_complete_and_exit+0x20/0x20
[12399.212432] ret_from_fork+0x2d/0x50
[12399.213087] ? kthread_complete_and_exit+0x20/0x20
[12399.213895] ret_from_fork_asm+0x11/0x20
[12399.214585] RIP: 0000:0x0
[12399.215098] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
[12399.216131] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
[12399.217361] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[12399.218442] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[12399.219546] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[12399.220629] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[12399.221710] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[12399.222811] </TASK>
[12399.223263]
[12399.223628] Allocated by task 1621:
[12399.224238] kasan_save_stack+0x1c/0x40
[12399.224900] kasan_set_track+0x21/0x30
[12399.225558] __kasan_slab_alloc+0x62/0x70
[12399.226240] kmem_cache_alloc+0x194/0x370
[12399.226920] mempool_alloc+0xe1/0x260
[12399.227573] bio_alloc_bioset+0x2c7/0x450
[12399.228266] btrfs_bio_alloc+0x2e/0x50 [btrfs]
[12399.229208] submit_extent_page+0x2e0/0x5c0 [btrfs]
[12399.230206] btrfs_do_readpage+0x52a/0xb50 [btrfs]
[12399.231188] extent_readahead+0x1c3/0x2b0 [btrfs]
[12399.232141] read_pages+0x10e/0x5f0
[12399.232748] page_cache_ra_unbounded+0x1ed/0x2c0
[12399.233508] filemap_get_pages+0x218/0x620
[12399.234196] filemap_read+0x1ef/0x660
[12399.234825] vfs_read+0x3b7/0x4f0
[12399.235433] ksys_read+0xc7/0x160
[12399.236035] do_syscall_64+0x3d/0x90
[12399.236675] entry_SYSCALL_64_after_hwframe+0x46/0xb0
[12399.237505]
[12399.237856] Freed by task 1621:
[12399.238423] kasan_save_stack+0x1c/0x40
[12399.239084] kasan_set_track+0x21/0x30
[12399.239728] kasan_save_free_info+0x27/0x40
[12399.240435] ____kasan_slab_free+0x1c2/0x230
[12399.241141] kmem_cache_free+0x13a/0x410
[12399.241813] bio_free+0x76/0xa0
[12399.242386] end_bio_extent_readpage+0x139/0x400 [btrfs]
[12399.243434] btrfs_submit_chunk+0x6e9/0x9b0 [btrfs]
[12399.244421] btrfs_submit_bio+0x21/0x60 [btrfs]
[12399.245356] submit_one_bio+0x6a/0xb0 [btrfs]
[12399.246273] submit_extent_page+0x232/0x5c0 [btrfs]
[12399.247268] btrfs_do_readpage+0x52a/0xb50 [btrfs]
[12399.248648] extent_readahead+0x1c3/0x2b0 [btrfs]
[12399.249608] read_pages+0x10e/0x5f0
[12399.250236] page_cache_ra_unbounded+0x1ed/0x2c0
[12399.251006] filemap_get_pages+0x218/0x620
[12399.251688] filemap_read+0x1ef/0x660
[12399.252304] vfs_read+0x3b7/0x4f0
[12399.252880] ksys_read+0xc7/0x160
[12399.253463] do_syscall_64+0x3d/0x90
[12399.254086] entry_SYSCALL_64_after_hwframe+0x46/0xb0
[12399.254878]
[12399.255239] The buggy address belongs to the object at ffff888029c96c80
[12399.255239] which belongs to the cache biovec-max of size 4096
[12399.257027] The buggy address is located 0 bytes inside of
[12399.257027] freed 4096-byte region [ffff888029c96c80, ffff888029c97c80)
[12399.258790]
[12399.259143] The buggy address belongs to the physical page:
[12399.259994] page:ffff88807e872400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x29c90
[12399.261410] head:ffff88807e872400 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[12399.262656] flags: 0xa80000010200(slab|head|section=5|zone=1)
[12399.263548] page_type: 0xffffffff()
[12399.264144] raw: 0000a80000010200 ffff888001310ac0 ffff88807e099a10 ffff888001312b70
[12399.265851] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[12399.267051] page dumped because: kasan: bad access detected
[12399.267900]
[12399.268239] Memory state around the buggy address:
[12399.268986] ffff888029c96b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[12399.270092] ffff888029c96c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[12399.271203] >ffff888029c96c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[12399.272590] ^
[12399.273152] ffff888029c96d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[12399.274267] ffff888029c96d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[12399.275385] ==================================================================
[12399.276551] Disabling lock debugging due to kernel taint
[12399.277362] assertion failed: bv->bv_len == fs_info->sectorsize, in fs/btrfs/inode.c:3441
[12399.278654] ------------[ cut here ]------------
[12399.279387] kernel BUG at fs/btrfs/inode.c:3441!
[12399.280165] invalid opcode: 0000 [#1] PREEMPT SMP KASAN
[12399.280979] CPU: 1 PID: 21890 Comm: kworker/u8:4 Tainted: G B 6.5.0-rc2-default+ #2130
[12399.282353] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
[12399.283964] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
[12399.285309] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.288907] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
[12399.290022] RAX: 000000000000004d RBX: ffff888015166d80 RCX: 0000000000000000
[12399.291074] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI: ffffffff99c9e0e0
[12399.292385] RBP: ffff888049277cc0 R08: 0000000000000001 R09: ffffed100924ef0f
[12399.293411] R10: ffff88804927787f R11: fffffffffffe37c0 R12: ffff888014bc8000
[12399.294446] R13: ffff88804abdc000 R14: 0000000000000655 R15: ffff8880168b3b78
[12399.295468] FS: 0000000000000000(0000) GS:ffff888068c00000(0000) knlGS:0000000000000000
[12399.296693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[12399.297549] CR2: ffffffffffffffd6 CR3: 000000007288b000 CR4: 00000000000006a0
[12399.298577] Call Trace:
[12399.299049] <TASK>
[12399.299472] ? die+0x32/0x80
[12399.302753] ? do_trap+0x12d/0x160
[12399.303356] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.304487] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.305593] ? do_error_trap+0x90/0x130
[12399.306241] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.307314] ? handle_invalid_op+0x2c/0x30
[12399.307999] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.308972] ? exc_invalid_op+0x29/0x40
[12399.309616] ? asm_exc_invalid_op+0x16/0x20
[12399.310302] ? preempt_count_sub+0x18/0xc0
[12399.310988] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.311976] ? end_report+0x7a/0x130
[12399.312594] ? btrfs_check_sector_csum+0x210/0x210 [btrfs]
[12399.313625] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
[12399.314637] btrfs_check_read_bio+0x238/0x8d0 [btrfs]
[12399.315641] ? lock_sync+0x100/0x100
[12399.316247] ? try_to_wake_up+0x50/0x880
[12399.316906] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
[12399.317946] process_one_work+0x504/0xa00
[12399.318625] ? pwq_dec_nr_in_flight+0x100/0x100
[12399.319363] ? worker_thread+0x160/0x630
[12399.320023] worker_thread+0x8e/0x630
[12399.320640] ? __kthread_parkme+0xd8/0xf0
[12399.321299] ? process_one_work+0xa00/0xa00
[12399.321990] kthread+0x198/0x1e0
[12399.322564] ? kthread_complete_and_exit+0x20/0x20
[12399.323334] ret_from_fork+0x2d/0x50
[12399.323945] ? kthread_complete_and_exit+0x20/0x20
[12399.324713] ret_from_fork_asm+0x11/0x20
[12399.325364] RIP: 0000:0x0
[12399.325855] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
[12399.326833] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
[12399.328007] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[12399.329049] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[12399.330095] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[12399.331146] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[12399.332197] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[12399.333246] </TASK>
[12399.333679] Modules linked in: dm_flakey dm_mod btrfs blake2b_generic libcrc32c xor lzo_compress lzo_decompress raid6_pq zstd_decompress zstd_compress xxhash zstd_common loop
[12399.335968] ---[ end trace 0000000000000000 ]---
[12399.336714] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
[12399.340450] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
[12399.341276] RAX: 000000000000004d RBX: ffff888015166d80 RCX: 0000000000000000
[12399.342337] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI: ffffffff99c9e0e0
[12399.343389] RBP: ffff888049277cc0 R08: 0000000000000001 R09: ffffed100924ef0f
[12399.349246] R10: ffff88804927787f R11: fffffffffffe37c0 R12: ffff888014bc8000
[12399.350585] R13: ffff88804abdc000 R14: 0000000000000655 R15: ffff8880168b3b78
[12399.351846] FS: 0000000000000000(0000) GS:ffff888069000000(0000) knlGS:0000000000000000
[12399.353471] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[12399.354460] CR2: 000055a729c8d000 CR3: 000000003b421000 CR4: 00000000000006a0
[12399.357113] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
Connection closed by foreign host.
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-20 15:06 ` David Sterba
@ 2023-07-20 22:15 ` Qu Wenruo
2023-07-20 22:55 ` Qu Wenruo
0 siblings, 1 reply; 17+ messages in thread
From: Qu Wenruo @ 2023-07-20 22:15 UTC (permalink / raw)
To: dsterba, Qu Wenruo; +Cc: linux-btrfs
On 2023/7/20 23:06, David Sterba wrote:
> On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
>> [CHANGELOG]
>> v2:
>> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
>>
>> v3:
>> - Fix an undefined behavior bug in memcpy_extent_buffer()
>> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
>> ranges, thus it calls copy_pages() which do overlap checks and switch
>> to memmove() when needed.
>>
>> Here we introduce __write_extent_buffer() which allows us to switch
>> to go memmove() if needed.
>>
>> - Also refactor memmove_extent_buffer()
>> Since we have __write_extent_buffer() which can go memmove(), it's
>> not hard to refactor memmove_extent_buffer().
>>
>> But there is still a pitfall that we have to handle double page
>> boundaries as the old behavior, explained in the last patch.
>>
>> - Add selftests on extent buffer memory operations
>> I have failed too many times refactoring memmove_extent_buffer(), the
>> wasted time should be a memorial for my stupidity.
>
> btrfs/125 kasan complains:
>
> btrfs/125 [01:09:17][12387.340788] run fstests btrfs/125 at 2023-07-20 01:09:18
> [12389.539422] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e devid 1 transid 297 /dev/vda scanned by mount (1360)
> [12389.543907] BTRFS info (device vda): using sha256 (sha256-generic) checksum algorithm
> [12389.545345] BTRFS info (device vda): using free space tree
> [12389.568662] BTRFS info (device vda): auto enabling async discard
> [12393.628549] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 1 transid 6 /dev/vdb scanned by mkfs.btrfs (1544)
> [12393.630846] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 2 transid 6 /dev/vdc scanned by mkfs.btrfs (1544)
> [12393.633042] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 3 transid 6 /dev/vdd scanned by mkfs.btrfs (1544)
> [12393.675240] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
> [12393.676651] BTRFS info (device vdb): using free space tree
> [12393.705607] BTRFS info (device vdb): auto enabling async discard
> [12393.708477] BTRFS info (device vdb): checking UUID tree
> [12394.479228] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 2 transid 8 /dev/vdc scanned by mount (1573)
> [12394.481329] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c devid 1 transid 8 /dev/vdb scanned by mount (1573)
> [12394.484821] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
> [12394.486018] BTRFS info (device vdb): allowing degraded mounts
> [12394.486801] BTRFS info (device vdb): using free space tree
> [12394.495639] BTRFS warning (device vdb): devid 3 uuid 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
> [12394.499898] BTRFS warning (device vdb): devid 3 uuid 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
> [12394.523726] BTRFS info (device vdb): auto enabling async discard
> [12398.021206] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e devid 1 transid 298 /dev/vda scanned by btrfs (1597)
> [12398.066913] BTRFS info (device vdb): using sha256 (sha256-generic) checksum algorithm
> [12398.068414] BTRFS info (device vdb): using free space tree
> [12398.080629] BTRFS error (device vdb): bad tree block start, mirror 1 want 40239104 have 31129600
> [12398.085719] BTRFS info (device vdb): read error corrected: ino 0 off 40239104 (dev /dev/vdd sector 19840)
> [12398.087705] BTRFS info (device vdb): read error corrected: ino 0 off 40243200 (dev /dev/vdd sector 19848)
> [12398.089689] BTRFS info (device vdb): read error corrected: ino 0 off 40247296 (dev /dev/vdd sector 19856)
> [12398.091575] BTRFS info (device vdb): read error corrected: ino 0 off 40251392 (dev /dev/vdd sector 19864)
> [12398.093929] BTRFS error (device vdb): bad tree block start, mirror 1 want 40255488 have 31145984
> [12398.097548] BTRFS info (device vdb): read error corrected: ino 0 off 40255488 (dev /dev/vdd sector 19872)
> [12398.099311] BTRFS info (device vdb): read error corrected: ino 0 off 40259584 (dev /dev/vdd sector 19880)
> [12398.101038] BTRFS info (device vdb): read error corrected: ino 0 off 40263680 (dev /dev/vdd sector 19888)
> [12398.102663] BTRFS info (device vdb): read error corrected: ino 0 off 40267776 (dev /dev/vdd sector 19896)
> [12398.105020] BTRFS error (device vdb): bad tree block start, mirror 1 want 40271872 have 31162368
> [12398.107479] BTRFS info (device vdb): read error corrected: ino 0 off 40271872 (dev /dev/vdd sector 19904)
> [12398.109094] BTRFS info (device vdb): read error corrected: ino 0 off 40275968 (dev /dev/vdd sector 19912)
> [12398.111111] BTRFS error (device vdb): bad tree block start, mirror 1 want 40222720 have 31113216
> [12398.121818] BTRFS info (device vdb): auto enabling async discard
> [12398.219247] BTRFS error (device vdb): bad tree block start, mirror 1 want 40288256 have 31178752
> [12398.233989] BTRFS info (device vdb): balance: start -d -m -s
> [12398.235327] BTRFS info (device vdb): relocating block group 2365194240 flags data|raid5
> [12398.310482] BTRFS error (device vdb): bad tree block start, mirror 1 want 40189952 have 31080448
> [12398.482607] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
> [12398.489325] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
> [12398.493394] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
> [12398.496146] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
> [12398.499510] BTRFS error (device vdb): parent transid verify failed on logical 39108608 mirror 1 wanted 9 found 7
> [12398.736591] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
> [12398.740199] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
> [12398.907346] BTRFS info (device vdb): balance: ended with status: -5
> [12399.168513] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
> [12399.174882] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 2 wanted 9 found 7
Comparing it to my pass runs, the rebuild is not working for metadata.
As my passing bios shows no error on mirror 2 (rebuilt from P).
> [12399.180441] ==================================================================
> [12399.183100] BUG: KASAN: slab-use-after-free in btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> [12399.186056] Read of size 8 at addr ffff888029c96c80 by task kworker/u8:4/21890
> [12399.188440]
> [12399.188965] CPU: 1 PID: 21890 Comm: kworker/u8:4 Not tainted 6.5.0-rc2-default+ #2130
> [12399.191616] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
> [12399.193366] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
> [12399.194534] Call Trace:
> [12399.195039] <TASK>
> [12399.195484] dump_stack_lvl+0x46/0x70
> [12399.196182] print_address_description.constprop.0+0x30/0x420
> [12399.197136] ? preempt_count_sub+0x18/0xc0
> [12399.197858] print_report+0xb0/0x260
> [12399.198497] ? __virt_addr_valid+0xbb/0xf0
> [12399.199204] ? kasan_addr_to_slab+0x94/0xc0
> [12399.199936] kasan_report+0xbe/0xf0
> [12399.200562] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> [12399.201618] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> [12399.202667] btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
This is werid, as btrfs_check_read_bio() can only happen for data bios.
Let me double check what's going wrong.
Thanks,
Qu
> [12399.203703] ? lock_sync+0x100/0x100
> [12399.204344] ? try_to_wake_up+0x50/0x880
> [12399.205025] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
> [12399.206116] ? mark_held_locks+0x1a/0x80
> [12399.206802] process_one_work+0x504/0xa00
> [12399.207530] ? pwq_dec_nr_in_flight+0x100/0x100
> [12399.208305] ? worker_thread+0x160/0x630
> [12399.208996] worker_thread+0x8e/0x630
> [12399.209638] ? __kthread_parkme+0xd8/0xf0
> [12399.210331] ? process_one_work+0xa00/0xa00
> [12399.211032] kthread+0x198/0x1e0
> [12399.211634] ? kthread_complete_and_exit+0x20/0x20
> [12399.212432] ret_from_fork+0x2d/0x50
> [12399.213087] ? kthread_complete_and_exit+0x20/0x20
> [12399.213895] ret_from_fork_asm+0x11/0x20
> [12399.214585] RIP: 0000:0x0
> [12399.215098] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
> [12399.216131] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
> [12399.217361] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
> [12399.218442] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> [12399.219546] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
> [12399.220629] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> [12399.221710] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [12399.222811] </TASK>
> [12399.223263]
> [12399.223628] Allocated by task 1621:
> [12399.224238] kasan_save_stack+0x1c/0x40
> [12399.224900] kasan_set_track+0x21/0x30
> [12399.225558] __kasan_slab_alloc+0x62/0x70
> [12399.226240] kmem_cache_alloc+0x194/0x370
> [12399.226920] mempool_alloc+0xe1/0x260
> [12399.227573] bio_alloc_bioset+0x2c7/0x450
> [12399.228266] btrfs_bio_alloc+0x2e/0x50 [btrfs]
> [12399.229208] submit_extent_page+0x2e0/0x5c0 [btrfs]
> [12399.230206] btrfs_do_readpage+0x52a/0xb50 [btrfs]
> [12399.231188] extent_readahead+0x1c3/0x2b0 [btrfs]
> [12399.232141] read_pages+0x10e/0x5f0
> [12399.232748] page_cache_ra_unbounded+0x1ed/0x2c0
> [12399.233508] filemap_get_pages+0x218/0x620
> [12399.234196] filemap_read+0x1ef/0x660
> [12399.234825] vfs_read+0x3b7/0x4f0
> [12399.235433] ksys_read+0xc7/0x160
> [12399.236035] do_syscall_64+0x3d/0x90
> [12399.236675] entry_SYSCALL_64_after_hwframe+0x46/0xb0
> [12399.237505]
> [12399.237856] Freed by task 1621:
> [12399.238423] kasan_save_stack+0x1c/0x40
> [12399.239084] kasan_set_track+0x21/0x30
> [12399.239728] kasan_save_free_info+0x27/0x40
> [12399.240435] ____kasan_slab_free+0x1c2/0x230
> [12399.241141] kmem_cache_free+0x13a/0x410
> [12399.241813] bio_free+0x76/0xa0
> [12399.242386] end_bio_extent_readpage+0x139/0x400 [btrfs]
> [12399.243434] btrfs_submit_chunk+0x6e9/0x9b0 [btrfs]
> [12399.244421] btrfs_submit_bio+0x21/0x60 [btrfs]
> [12399.245356] submit_one_bio+0x6a/0xb0 [btrfs]
> [12399.246273] submit_extent_page+0x232/0x5c0 [btrfs]
> [12399.247268] btrfs_do_readpage+0x52a/0xb50 [btrfs]
> [12399.248648] extent_readahead+0x1c3/0x2b0 [btrfs]
> [12399.249608] read_pages+0x10e/0x5f0
> [12399.250236] page_cache_ra_unbounded+0x1ed/0x2c0
> [12399.251006] filemap_get_pages+0x218/0x620
> [12399.251688] filemap_read+0x1ef/0x660
> [12399.252304] vfs_read+0x3b7/0x4f0
> [12399.252880] ksys_read+0xc7/0x160
> [12399.253463] do_syscall_64+0x3d/0x90
> [12399.254086] entry_SYSCALL_64_after_hwframe+0x46/0xb0
> [12399.254878]
> [12399.255239] The buggy address belongs to the object at ffff888029c96c80
> [12399.255239] which belongs to the cache biovec-max of size 4096
> [12399.257027] The buggy address is located 0 bytes inside of
> [12399.257027] freed 4096-byte region [ffff888029c96c80, ffff888029c97c80)
> [12399.258790]
> [12399.259143] The buggy address belongs to the physical page:
> [12399.259994] page:ffff88807e872400 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x29c90
> [12399.261410] head:ffff88807e872400 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
> [12399.262656] flags: 0xa80000010200(slab|head|section=5|zone=1)
> [12399.263548] page_type: 0xffffffff()
> [12399.264144] raw: 0000a80000010200 ffff888001310ac0 ffff88807e099a10 ffff888001312b70
> [12399.265851] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
> [12399.267051] page dumped because: kasan: bad access detected
> [12399.267900]
> [12399.268239] Memory state around the buggy address:
> [12399.268986] ffff888029c96b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [12399.270092] ffff888029c96c00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [12399.271203] >ffff888029c96c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [12399.272590] ^
> [12399.273152] ffff888029c96d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [12399.274267] ffff888029c96d80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [12399.275385] ==================================================================
> [12399.276551] Disabling lock debugging due to kernel taint
> [12399.277362] assertion failed: bv->bv_len == fs_info->sectorsize, in fs/btrfs/inode.c:3441
> [12399.278654] ------------[ cut here ]------------
> [12399.279387] kernel BUG at fs/btrfs/inode.c:3441!
> [12399.280165] invalid opcode: 0000 [#1] PREEMPT SMP KASAN
> [12399.280979] CPU: 1 PID: 21890 Comm: kworker/u8:4 Tainted: G B 6.5.0-rc2-default+ #2130
> [12399.282353] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
> [12399.283964] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
> [12399.285309] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.288907] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
> [12399.290022] RAX: 000000000000004d RBX: ffff888015166d80 RCX: 0000000000000000
> [12399.291074] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI: ffffffff99c9e0e0
> [12399.292385] RBP: ffff888049277cc0 R08: 0000000000000001 R09: ffffed100924ef0f
> [12399.293411] R10: ffff88804927787f R11: fffffffffffe37c0 R12: ffff888014bc8000
> [12399.294446] R13: ffff88804abdc000 R14: 0000000000000655 R15: ffff8880168b3b78
> [12399.295468] FS: 0000000000000000(0000) GS:ffff888068c00000(0000) knlGS:0000000000000000
> [12399.296693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [12399.297549] CR2: ffffffffffffffd6 CR3: 000000007288b000 CR4: 00000000000006a0
> [12399.298577] Call Trace:
> [12399.299049] <TASK>
> [12399.299472] ? die+0x32/0x80
> [12399.302753] ? do_trap+0x12d/0x160
> [12399.303356] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.304487] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.305593] ? do_error_trap+0x90/0x130
> [12399.306241] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.307314] ? handle_invalid_op+0x2c/0x30
> [12399.307999] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.308972] ? exc_invalid_op+0x29/0x40
> [12399.309616] ? asm_exc_invalid_op+0x16/0x20
> [12399.310302] ? preempt_count_sub+0x18/0xc0
> [12399.310988] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.311976] ? end_report+0x7a/0x130
> [12399.312594] ? btrfs_check_sector_csum+0x210/0x210 [btrfs]
> [12399.313625] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> [12399.314637] btrfs_check_read_bio+0x238/0x8d0 [btrfs]
> [12399.315641] ? lock_sync+0x100/0x100
> [12399.316247] ? try_to_wake_up+0x50/0x880
> [12399.316906] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
> [12399.317946] process_one_work+0x504/0xa00
> [12399.318625] ? pwq_dec_nr_in_flight+0x100/0x100
> [12399.319363] ? worker_thread+0x160/0x630
> [12399.320023] worker_thread+0x8e/0x630
> [12399.320640] ? __kthread_parkme+0xd8/0xf0
> [12399.321299] ? process_one_work+0xa00/0xa00
> [12399.321990] kthread+0x198/0x1e0
> [12399.322564] ? kthread_complete_and_exit+0x20/0x20
> [12399.323334] ret_from_fork+0x2d/0x50
> [12399.323945] ? kthread_complete_and_exit+0x20/0x20
> [12399.324713] ret_from_fork_asm+0x11/0x20
> [12399.325364] RIP: 0000:0x0
> [12399.325855] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
> [12399.326833] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
> [12399.328007] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
> [12399.329049] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
> [12399.330095] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
> [12399.331146] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> [12399.332197] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [12399.333246] </TASK>
> [12399.333679] Modules linked in: dm_flakey dm_mod btrfs blake2b_generic libcrc32c xor lzo_compress lzo_decompress raid6_pq zstd_decompress zstd_compress xxhash zstd_common loop
> [12399.335968] ---[ end trace 0000000000000000 ]---
> [12399.336714] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
> [12399.340450] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
> [12399.341276] RAX: 000000000000004d RBX: ffff888015166d80 RCX: 0000000000000000
> [12399.342337] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI: ffffffff99c9e0e0
> [12399.343389] RBP: ffff888049277cc0 R08: 0000000000000001 R09: ffffed100924ef0f
> [12399.349246] R10: ffff88804927787f R11: fffffffffffe37c0 R12: ffff888014bc8000
> [12399.350585] R13: ffff88804abdc000 R14: 0000000000000655 R15: ffff8880168b3b78
> [12399.351846] FS: 0000000000000000(0000) GS:ffff888069000000(0000) knlGS:0000000000000000
> [12399.353471] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [12399.354460] CR2: 000055a729c8d000 CR3: 000000003b421000 CR4: 00000000000006a0
> [12399.357113] BTRFS error (device vdb): parent transid verify failed on logical 38993920 mirror 1 wanted 9 found 7
> Connection closed by foreign host.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-20 22:15 ` Qu Wenruo
@ 2023-07-20 22:55 ` Qu Wenruo
2023-07-21 15:13 ` David Sterba
0 siblings, 1 reply; 17+ messages in thread
From: Qu Wenruo @ 2023-07-20 22:55 UTC (permalink / raw)
To: dsterba, Qu Wenruo; +Cc: linux-btrfs
On 2023/7/21 06:15, Qu Wenruo wrote:
>
>
> On 2023/7/20 23:06, David Sterba wrote:
>> On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
>>> [CHANGELOG]
>>> v2:
>>> - Define write_extent_buffer_fsid/chunk_tree_uuid() as inline helpers
>>>
>>> v3:
>>> - Fix an undefined behavior bug in memcpy_extent_buffer()
>>> Unlike the name, memcpy_extent_buffer() needs to handle overlapping
>>> ranges, thus it calls copy_pages() which do overlap checks and switch
>>> to memmove() when needed.
>>>
>>> Here we introduce __write_extent_buffer() which allows us to switch
>>> to go memmove() if needed.
>>>
>>> - Also refactor memmove_extent_buffer()
>>> Since we have __write_extent_buffer() which can go memmove(), it's
>>> not hard to refactor memmove_extent_buffer().
>>>
>>> But there is still a pitfall that we have to handle double page
>>> boundaries as the old behavior, explained in the last patch.
>>>
>>> - Add selftests on extent buffer memory operations
>>> I have failed too many times refactoring memmove_extent_buffer(), the
>>> wasted time should be a memorial for my stupidity.
>>
>> btrfs/125 kasan complains:
>>
>> btrfs/125 [01:09:17][12387.340788] run fstests btrfs/125 at
>> 2023-07-20 01:09:18
>> [12389.539422] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e
>> devid 1 transid 297 /dev/vda scanned by mount (1360)
>> [12389.543907] BTRFS info (device vda): using sha256 (sha256-generic)
>> checksum algorithm
>> [12389.545345] BTRFS info (device vda): using free space tree
>> [12389.568662] BTRFS info (device vda): auto enabling async discard
>> [12393.628549] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c
>> devid 1 transid 6 /dev/vdb scanned by mkfs.btrfs (1544)
>> [12393.630846] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c
>> devid 2 transid 6 /dev/vdc scanned by mkfs.btrfs (1544)
>> [12393.633042] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c
>> devid 3 transid 6 /dev/vdd scanned by mkfs.btrfs (1544)
>> [12393.675240] BTRFS info (device vdb): using sha256 (sha256-generic)
>> checksum algorithm
>> [12393.676651] BTRFS info (device vdb): using free space tree
>> [12393.705607] BTRFS info (device vdb): auto enabling async discard
>> [12393.708477] BTRFS info (device vdb): checking UUID tree
>> [12394.479228] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c
>> devid 2 transid 8 /dev/vdc scanned by mount (1573)
>> [12394.481329] BTRFS: device fsid 472a6171-cb8b-4916-8353-172e05aa255c
>> devid 1 transid 8 /dev/vdb scanned by mount (1573)
>> [12394.484821] BTRFS info (device vdb): using sha256 (sha256-generic)
>> checksum algorithm
>> [12394.486018] BTRFS info (device vdb): allowing degraded mounts
>> [12394.486801] BTRFS info (device vdb): using free space tree
>> [12394.495639] BTRFS warning (device vdb): devid 3 uuid
>> 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
>> [12394.499898] BTRFS warning (device vdb): devid 3 uuid
>> 8c6b8e23-2053-4b0a-9d30-0facd2dad945 is missing
>> [12394.523726] BTRFS info (device vdb): auto enabling async discard
>> [12398.021206] BTRFS: device fsid b349d2bf-44dc-4990-8e64-c4933de9e42e
>> devid 1 transid 298 /dev/vda scanned by btrfs (1597)
>> [12398.066913] BTRFS info (device vdb): using sha256 (sha256-generic)
>> checksum algorithm
>> [12398.068414] BTRFS info (device vdb): using free space tree
>> [12398.080629] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40239104 have 31129600
>> [12398.085719] BTRFS info (device vdb): read error corrected: ino 0
>> off 40239104 (dev /dev/vdd sector 19840)
>> [12398.087705] BTRFS info (device vdb): read error corrected: ino 0
>> off 40243200 (dev /dev/vdd sector 19848)
>> [12398.089689] BTRFS info (device vdb): read error corrected: ino 0
>> off 40247296 (dev /dev/vdd sector 19856)
>> [12398.091575] BTRFS info (device vdb): read error corrected: ino 0
>> off 40251392 (dev /dev/vdd sector 19864)
>> [12398.093929] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40255488 have 31145984
>> [12398.097548] BTRFS info (device vdb): read error corrected: ino 0
>> off 40255488 (dev /dev/vdd sector 19872)
>> [12398.099311] BTRFS info (device vdb): read error corrected: ino 0
>> off 40259584 (dev /dev/vdd sector 19880)
>> [12398.101038] BTRFS info (device vdb): read error corrected: ino 0
>> off 40263680 (dev /dev/vdd sector 19888)
>> [12398.102663] BTRFS info (device vdb): read error corrected: ino 0
>> off 40267776 (dev /dev/vdd sector 19896)
>> [12398.105020] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40271872 have 31162368
>> [12398.107479] BTRFS info (device vdb): read error corrected: ino 0
>> off 40271872 (dev /dev/vdd sector 19904)
>> [12398.109094] BTRFS info (device vdb): read error corrected: ino 0
>> off 40275968 (dev /dev/vdd sector 19912)
>> [12398.111111] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40222720 have 31113216
>> [12398.121818] BTRFS info (device vdb): auto enabling async discard
>> [12398.219247] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40288256 have 31178752
>> [12398.233989] BTRFS info (device vdb): balance: start -d -m -s
>> [12398.235327] BTRFS info (device vdb): relocating block group
>> 2365194240 flags data|raid5
>> [12398.310482] BTRFS error (device vdb): bad tree block start, mirror
>> 1 want 40189952 have 31080448
>> [12398.482607] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 1 wanted 9 found 7
>> [12398.489325] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 2 wanted 9 found 7
>> [12398.493394] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 1 wanted 9 found 7
>> [12398.496146] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 2 wanted 9 found 7
>> [12398.499510] BTRFS error (device vdb): parent transid verify failed
>> on logical 39108608 mirror 1 wanted 9 found 7
>> [12398.736591] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 1 wanted 9 found 7
>> [12398.740199] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 2 wanted 9 found 7
>> [12398.907346] BTRFS info (device vdb): balance: ended with status: -5
>> [12399.168513] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 1 wanted 9 found 7
>> [12399.174882] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 2 wanted 9 found 7
>
> Comparing it to my pass runs, the rebuild is not working for metadata.
>
> As my passing bios shows no error on mirror 2 (rebuilt from P).
>
>> [12399.180441]
>> ==================================================================
>> [12399.183100] BUG: KASAN: slab-use-after-free in
>> btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
>> [12399.186056] Read of size 8 at addr ffff888029c96c80 by task
>> kworker/u8:4/21890
>> [12399.188440]
>> [12399.188965] CPU: 1 PID: 21890 Comm: kworker/u8:4 Not tainted
>> 6.5.0-rc2-default+ #2130
>> [12399.191616] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
>> [12399.193366] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
>> [12399.194534] Call Trace:
>> [12399.195039] <TASK>
>> [12399.195484] dump_stack_lvl+0x46/0x70
>> [12399.196182] print_address_description.constprop.0+0x30/0x420
>> [12399.197136] ? preempt_count_sub+0x18/0xc0
>> [12399.197858] print_report+0xb0/0x260
>> [12399.198497] ? __virt_addr_valid+0xbb/0xf0
>> [12399.199204] ? kasan_addr_to_slab+0x94/0xc0
>> [12399.199936] kasan_report+0xbe/0xf0
>> [12399.200562] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
>> [12399.201618] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
>> [12399.202667] btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
>
> This is werid, as btrfs_check_read_bio() can only happen for data bios.
>
> Let me double check what's going wrong.
What about the reproducibility? I failed to reproduce here, and I
checked the git log, it doesn't has any obvious changes to RAID56 code
either (all are already in my code base).
Thanks,
Qu
>
> Thanks,
> Qu
>> [12399.203703] ? lock_sync+0x100/0x100
>> [12399.204344] ? try_to_wake_up+0x50/0x880
>> [12399.205025] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
>> [12399.206116] ? mark_held_locks+0x1a/0x80
>> [12399.206802] process_one_work+0x504/0xa00
>> [12399.207530] ? pwq_dec_nr_in_flight+0x100/0x100
>> [12399.208305] ? worker_thread+0x160/0x630
>> [12399.208996] worker_thread+0x8e/0x630
>> [12399.209638] ? __kthread_parkme+0xd8/0xf0
>> [12399.210331] ? process_one_work+0xa00/0xa00
>> [12399.211032] kthread+0x198/0x1e0
>> [12399.211634] ? kthread_complete_and_exit+0x20/0x20
>> [12399.212432] ret_from_fork+0x2d/0x50
>> [12399.213087] ? kthread_complete_and_exit+0x20/0x20
>> [12399.213895] ret_from_fork_asm+0x11/0x20
>> [12399.214585] RIP: 0000:0x0
>> [12399.215098] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
>> [12399.216131] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX:
>> 0000000000000000
>> [12399.217361] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
>> 0000000000000000
>> [12399.218442] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
>> 0000000000000000
>> [12399.219546] RBP: 0000000000000000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [12399.220629] R10: 0000000000000000 R11: 0000000000000000 R12:
>> 0000000000000000
>> [12399.221710] R13: 0000000000000000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [12399.222811] </TASK>
>> [12399.223263]
>> [12399.223628] Allocated by task 1621:
>> [12399.224238] kasan_save_stack+0x1c/0x40
>> [12399.224900] kasan_set_track+0x21/0x30
>> [12399.225558] __kasan_slab_alloc+0x62/0x70
>> [12399.226240] kmem_cache_alloc+0x194/0x370
>> [12399.226920] mempool_alloc+0xe1/0x260
>> [12399.227573] bio_alloc_bioset+0x2c7/0x450
>> [12399.228266] btrfs_bio_alloc+0x2e/0x50 [btrfs]
>> [12399.229208] submit_extent_page+0x2e0/0x5c0 [btrfs]
>> [12399.230206] btrfs_do_readpage+0x52a/0xb50 [btrfs]
>> [12399.231188] extent_readahead+0x1c3/0x2b0 [btrfs]
>> [12399.232141] read_pages+0x10e/0x5f0
>> [12399.232748] page_cache_ra_unbounded+0x1ed/0x2c0
>> [12399.233508] filemap_get_pages+0x218/0x620
>> [12399.234196] filemap_read+0x1ef/0x660
>> [12399.234825] vfs_read+0x3b7/0x4f0
>> [12399.235433] ksys_read+0xc7/0x160
>> [12399.236035] do_syscall_64+0x3d/0x90
>> [12399.236675] entry_SYSCALL_64_after_hwframe+0x46/0xb0
>> [12399.237505]
>> [12399.237856] Freed by task 1621:
>> [12399.238423] kasan_save_stack+0x1c/0x40
>> [12399.239084] kasan_set_track+0x21/0x30
>> [12399.239728] kasan_save_free_info+0x27/0x40
>> [12399.240435] ____kasan_slab_free+0x1c2/0x230
>> [12399.241141] kmem_cache_free+0x13a/0x410
>> [12399.241813] bio_free+0x76/0xa0
>> [12399.242386] end_bio_extent_readpage+0x139/0x400 [btrfs]
>> [12399.243434] btrfs_submit_chunk+0x6e9/0x9b0 [btrfs]
>> [12399.244421] btrfs_submit_bio+0x21/0x60 [btrfs]
>> [12399.245356] submit_one_bio+0x6a/0xb0 [btrfs]
>> [12399.246273] submit_extent_page+0x232/0x5c0 [btrfs]
>> [12399.247268] btrfs_do_readpage+0x52a/0xb50 [btrfs]
>> [12399.248648] extent_readahead+0x1c3/0x2b0 [btrfs]
>> [12399.249608] read_pages+0x10e/0x5f0
>> [12399.250236] page_cache_ra_unbounded+0x1ed/0x2c0
>> [12399.251006] filemap_get_pages+0x218/0x620
>> [12399.251688] filemap_read+0x1ef/0x660
>> [12399.252304] vfs_read+0x3b7/0x4f0
>> [12399.252880] ksys_read+0xc7/0x160
>> [12399.253463] do_syscall_64+0x3d/0x90
>> [12399.254086] entry_SYSCALL_64_after_hwframe+0x46/0xb0
>> [12399.254878]
>> [12399.255239] The buggy address belongs to the object at
>> ffff888029c96c80
>> [12399.255239] which belongs to the cache biovec-max of size 4096
>> [12399.257027] The buggy address is located 0 bytes inside of
>> [12399.257027] freed 4096-byte region [ffff888029c96c80,
>> ffff888029c97c80)
>> [12399.258790]
>> [12399.259143] The buggy address belongs to the physical page:
>> [12399.259994] page:ffff88807e872400 refcount:1 mapcount:0
>> mapping:0000000000000000 index:0x0 pfn:0x29c90
>> [12399.261410] head:ffff88807e872400 order:3 entire_mapcount:0
>> nr_pages_mapped:0 pincount:0
>> [12399.262656] flags: 0xa80000010200(slab|head|section=5|zone=1)
>> [12399.263548] page_type: 0xffffffff()
>> [12399.264144] raw: 0000a80000010200 ffff888001310ac0 ffff88807e099a10
>> ffff888001312b70
>> [12399.265851] raw: 0000000000000000 0000000000070007 00000001ffffffff
>> 0000000000000000
>> [12399.267051] page dumped because: kasan: bad access detected
>> [12399.267900]
>> [12399.268239] Memory state around the buggy address:
>> [12399.268986] ffff888029c96b80: fc fc fc fc fc fc fc fc fc fc fc fc
>> fc fc fc fc
>> [12399.270092] ffff888029c96c00: fc fc fc fc fc fc fc fc fc fc fc fc
>> fc fc fc fc
>> [12399.271203] >ffff888029c96c80: fa fb fb fb fb fb fb fb fb fb fb fb
>> fb fb fb fb
>> [12399.272590] ^
>> [12399.273152] ffff888029c96d00: fb fb fb fb fb fb fb fb fb fb fb fb
>> fb fb fb fb
>> [12399.274267] ffff888029c96d80: fb fb fb fb fb fb fb fb fb fb fb fb
>> fb fb fb fb
>> [12399.275385]
>> ==================================================================
>> [12399.276551] Disabling lock debugging due to kernel taint
>> [12399.277362] assertion failed: bv->bv_len == fs_info->sectorsize, in
>> fs/btrfs/inode.c:3441
>> [12399.278654] ------------[ cut here ]------------
>> [12399.279387] kernel BUG at fs/btrfs/inode.c:3441!
>> [12399.280165] invalid opcode: 0000 [#1] PREEMPT SMP KASAN
>> [12399.280979] CPU: 1 PID: 21890 Comm: kworker/u8:4 Tainted: G
>> B 6.5.0-rc2-default+ #2130
>> [12399.282353] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
>> [12399.283964] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
>> [12399.285309] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.288907] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
>> [12399.290022] RAX: 000000000000004d RBX: ffff888015166d80 RCX:
>> 0000000000000000
>> [12399.291074] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI:
>> ffffffff99c9e0e0
>> [12399.292385] RBP: ffff888049277cc0 R08: 0000000000000001 R09:
>> ffffed100924ef0f
>> [12399.293411] R10: ffff88804927787f R11: fffffffffffe37c0 R12:
>> ffff888014bc8000
>> [12399.294446] R13: ffff88804abdc000 R14: 0000000000000655 R15:
>> ffff8880168b3b78
>> [12399.295468] FS: 0000000000000000(0000) GS:ffff888068c00000(0000)
>> knlGS:0000000000000000
>> [12399.296693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [12399.297549] CR2: ffffffffffffffd6 CR3: 000000007288b000 CR4:
>> 00000000000006a0
>> [12399.298577] Call Trace:
>> [12399.299049] <TASK>
>> [12399.299472] ? die+0x32/0x80
>> [12399.302753] ? do_trap+0x12d/0x160
>> [12399.303356] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.304487] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.305593] ? do_error_trap+0x90/0x130
>> [12399.306241] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.307314] ? handle_invalid_op+0x2c/0x30
>> [12399.307999] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.308972] ? exc_invalid_op+0x29/0x40
>> [12399.309616] ? asm_exc_invalid_op+0x16/0x20
>> [12399.310302] ? preempt_count_sub+0x18/0xc0
>> [12399.310988] ? btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.311976] ? end_report+0x7a/0x130
>> [12399.312594] ? btrfs_check_sector_csum+0x210/0x210 [btrfs]
>> [12399.313625] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
>> [12399.314637] btrfs_check_read_bio+0x238/0x8d0 [btrfs]
>> [12399.315641] ? lock_sync+0x100/0x100
>> [12399.316247] ? try_to_wake_up+0x50/0x880
>> [12399.316906] ? btrfs_repair_io_failure+0x490/0x490 [btrfs]
>> [12399.317946] process_one_work+0x504/0xa00
>> [12399.318625] ? pwq_dec_nr_in_flight+0x100/0x100
>> [12399.319363] ? worker_thread+0x160/0x630
>> [12399.320023] worker_thread+0x8e/0x630
>> [12399.320640] ? __kthread_parkme+0xd8/0xf0
>> [12399.321299] ? process_one_work+0xa00/0xa00
>> [12399.321990] kthread+0x198/0x1e0
>> [12399.322564] ? kthread_complete_and_exit+0x20/0x20
>> [12399.323334] ret_from_fork+0x2d/0x50
>> [12399.323945] ? kthread_complete_and_exit+0x20/0x20
>> [12399.324713] ret_from_fork_asm+0x11/0x20
>> [12399.325364] RIP: 0000:0x0
>> [12399.325855] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
>> [12399.326833] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX:
>> 0000000000000000
>> [12399.328007] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
>> 0000000000000000
>> [12399.329049] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
>> 0000000000000000
>> [12399.330095] RBP: 0000000000000000 R08: 0000000000000000 R09:
>> 0000000000000000
>> [12399.331146] R10: 0000000000000000 R11: 0000000000000000 R12:
>> 0000000000000000
>> [12399.332197] R13: 0000000000000000 R14: 0000000000000000 R15:
>> 0000000000000000
>> [12399.333246] </TASK>
>> [12399.333679] Modules linked in: dm_flakey dm_mod btrfs
>> blake2b_generic libcrc32c xor lzo_compress lzo_decompress raid6_pq
>> zstd_decompress zstd_compress xxhash zstd_common loop
>> [12399.335968] ---[ end trace 0000000000000000 ]---
>> [12399.336714] RIP: 0010:btrfs_data_csum_ok+0x40f/0x530 [btrfs]
>> [12399.340450] RSP: 0018:ffff888049277b30 EFLAGS: 00010246
>> [12399.341276] RAX: 000000000000004d RBX: ffff888015166d80 RCX:
>> 0000000000000000
>> [12399.342337] RDX: 0000000000000000 RSI: ffffffff961007f8 RDI:
>> ffffffff99c9e0e0
>> [12399.343389] RBP: ffff888049277cc0 R08: 0000000000000001 R09:
>> ffffed100924ef0f
>> [12399.349246] R10: ffff88804927787f R11: fffffffffffe37c0 R12:
>> ffff888014bc8000
>> [12399.350585] R13: ffff88804abdc000 R14: 0000000000000655 R15:
>> ffff8880168b3b78
>> [12399.351846] FS: 0000000000000000(0000) GS:ffff888069000000(0000)
>> knlGS:0000000000000000
>> [12399.353471] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [12399.354460] CR2: 000055a729c8d000 CR3: 000000003b421000 CR4:
>> 00000000000006a0
>> [12399.357113] BTRFS error (device vdb): parent transid verify failed
>> on logical 38993920 mirror 1 wanted 9 found 7
>> Connection closed by foreign host.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-20 22:55 ` Qu Wenruo
@ 2023-07-21 15:13 ` David Sterba
2023-07-27 18:27 ` David Sterba
0 siblings, 1 reply; 17+ messages in thread
From: David Sterba @ 2023-07-21 15:13 UTC (permalink / raw)
To: Qu Wenruo; +Cc: dsterba, Qu Wenruo, linux-btrfs
On Fri, Jul 21, 2023 at 06:55:49AM +0800, Qu Wenruo wrote:
> On 2023/7/21 06:15, Qu Wenruo wrote:
> > On 2023/7/20 23:06, David Sterba wrote:
> >> On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
> >> [12399.180441]
> >> ==================================================================
> >> [12399.183100] BUG: KASAN: slab-use-after-free in
> >> btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> >> [12399.186056] Read of size 8 at addr ffff888029c96c80 by task
> >> kworker/u8:4/21890
> >> [12399.188440]
> >> [12399.188965] CPU: 1 PID: 21890 Comm: kworker/u8:4 Not tainted
> >> 6.5.0-rc2-default+ #2130
> >> [12399.191616] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> >> BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
> >> [12399.193366] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
> >> [12399.194534] Call Trace:
> >> [12399.195039] <TASK>
> >> [12399.195484] dump_stack_lvl+0x46/0x70
> >> [12399.196182] print_address_description.constprop.0+0x30/0x420
> >> [12399.197136] ? preempt_count_sub+0x18/0xc0
> >> [12399.197858] print_report+0xb0/0x260
> >> [12399.198497] ? __virt_addr_valid+0xbb/0xf0
> >> [12399.199204] ? kasan_addr_to_slab+0x94/0xc0
> >> [12399.199936] kasan_report+0xbe/0xf0
> >> [12399.200562] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> >> [12399.201618] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> >> [12399.202667] btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> >
> > This is werid, as btrfs_check_read_bio() can only happen for data bios.
> >
> > Let me double check what's going wrong.
>
> What about the reproducibility? I failed to reproduce here, and I
> checked the git log, it doesn't has any obvious changes to RAID56 code
> either (all are already in my code base).
This was first run, I'll do another one.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v3 0/8] btrfs: preparation patches for the incoming metadata folio conversion
2023-07-21 15:13 ` David Sterba
@ 2023-07-27 18:27 ` David Sterba
0 siblings, 0 replies; 17+ messages in thread
From: David Sterba @ 2023-07-27 18:27 UTC (permalink / raw)
To: David Sterba; +Cc: Qu Wenruo, Qu Wenruo, linux-btrfs
On Fri, Jul 21, 2023 at 05:13:47PM +0200, David Sterba wrote:
> On Fri, Jul 21, 2023 at 06:55:49AM +0800, Qu Wenruo wrote:
> > On 2023/7/21 06:15, Qu Wenruo wrote:
> > > On 2023/7/20 23:06, David Sterba wrote:
> > >> On Sat, Jul 15, 2023 at 07:08:26PM +0800, Qu Wenruo wrote:
> > >> [12399.180441]
> > >> ==================================================================
> > >> [12399.183100] BUG: KASAN: slab-use-after-free in
> > >> btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> > >> [12399.186056] Read of size 8 at addr ffff888029c96c80 by task
> > >> kworker/u8:4/21890
> > >> [12399.188440]
> > >> [12399.188965] CPU: 1 PID: 21890 Comm: kworker/u8:4 Not tainted
> > >> 6.5.0-rc2-default+ #2130
> > >> [12399.191616] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> > >> BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
> > >> [12399.193366] Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
> > >> [12399.194534] Call Trace:
> > >> [12399.195039] <TASK>
> > >> [12399.195484] dump_stack_lvl+0x46/0x70
> > >> [12399.196182] print_address_description.constprop.0+0x30/0x420
> > >> [12399.197136] ? preempt_count_sub+0x18/0xc0
> > >> [12399.197858] print_report+0xb0/0x260
> > >> [12399.198497] ? __virt_addr_valid+0xbb/0xf0
> > >> [12399.199204] ? kasan_addr_to_slab+0x94/0xc0
> > >> [12399.199936] kasan_report+0xbe/0xf0
> > >> [12399.200562] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> > >> [12399.201618] ? btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> > >> [12399.202667] btrfs_check_read_bio+0x19c/0x8d0 [btrfs]
> > >
> > > This is werid, as btrfs_check_read_bio() can only happen for data bios.
> > >
> > > Let me double check what's going wrong.
> >
> > What about the reproducibility? I failed to reproduce here, and I
> > checked the git log, it doesn't has any obvious changes to RAID56 code
> > either (all are already in my code base).
>
> This was first run, I'll do another one.
With reworked misc-next this patchset does not reproduce the errors so
I'll add it back.
^ permalink raw reply [flat|nested] 17+ messages in thread