* [PATCH v5 1/3] btrfs: add extra comments on extent_map members
2024-04-15 23:35 [PATCH v5 0/3] btrfs: more explaination on extent_map members Qu Wenruo
@ 2024-04-15 23:35 ` Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 2/3] btrfs: simplify the inline extent map creation Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 3/3] btrfs: add extra sanity checks for create_io_em() Qu Wenruo
2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2024-04-15 23:35 UTC (permalink / raw)
To: linux-btrfs; +Cc: Filipe Manana
The extent_map structure is very critical to btrfs, as it is involved
for both read and write paths.
Unfortunately the structure is not properly explained, making it pretty
hard to understand nor to do further improvement.
This patch adds extra comments explaining the major members based on
my code reading.
Hopefully we can find more members to cleanup in the future.
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_map.h | 56 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 10e9491865c9..10a60a4b09e5 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -35,19 +35,73 @@ enum {
};
/*
+ * This structure represents file extents and holes.
+ *
+ * Unlike on-disk file extent items, extent maps can be merged to
+ * save memory.
+ * This means members only match file extent items before any merging.
+ *
* Keep this structure as compact as possible, as we can have really large
* amounts of allocated extent maps at any time.
*/
struct extent_map {
struct rb_node rb_node;
- /* all of these are in bytes */
+ /* All of these are in bytes. */
+
+ /* File offset matching the offset of a BTRFS_EXTENT_ITEM_KEY key. */
u64 start;
+
+ /*
+ * Length of the file extent.
+ *
+ * For non-inlined file extents it's btrfs_file_extent_item::num_bytes.
+ * For inline extents it's sectorsize, since inline data starts at
+ * offsetof(struct btrfs_file_extent_item, disk_bytenr) thus
+ * btrfs_file_extent_item::num_bytes is not valid.
+ */
u64 len;
+
+ /*
+ * The file offset of the original file extent before splitting.
+ *
+ * This is an in-memory only member, matching
+ * extent_map::start - btrfs_file_extent_item::offset for
+ * regular/preallocated extents. EXTENT_MAP_HOLE otherwise.
+ */
u64 orig_start;
+
+ /*
+ * The full on-disk extent length, matching
+ * btrfs_file_extent_item::disk_num_bytes.
+ */
u64 orig_block_len;
+
+ /*
+ * The decompressed size of the whole on-disk extent, matching
+ * btrfs_file_extent_item::ram_bytes.
+ */
u64 ram_bytes;
+
+ /*
+ * The on-disk logical bytenr for the file extent.
+ *
+ * For compressed extents it matches btrfs_file_extent_item::disk_bytenr.
+ * For uncompressed extents it matches
+ * btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset
+ *
+ * For holes it is EXTENT_MAP_HOLE and for inline extents it is
+ * EXTENT_MAP_INLINE.
+ */
u64 block_start;
+
+ /*
+ * The on-disk length for the file extent.
+ *
+ * For compressed extents it matches btrfs_file_extent_item::disk_num_bytes.
+ * For uncompressed extents it matches extent_map::len.
+ * For holes and inline extents it's -1 and shouldn't be used.
+ */
u64 block_len;
/*
--
2.44.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v5 2/3] btrfs: simplify the inline extent map creation
2024-04-15 23:35 [PATCH v5 0/3] btrfs: more explaination on extent_map members Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 1/3] btrfs: add extra comments " Qu Wenruo
@ 2024-04-15 23:35 ` Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 3/3] btrfs: add extra sanity checks for create_io_em() Qu Wenruo
2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2024-04-15 23:35 UTC (permalink / raw)
To: linux-btrfs; +Cc: Filipe Manana
With the tree-checker ensuring all inline file extents starts at file
offset 0 and has a length no larger than sectorsize, we can simplify the
calculation to assigned those fixes values directly.
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/file-item.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index e58fb5347e65..844439f19949 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -1265,20 +1265,19 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_key key;
- u64 extent_start, extent_end;
+ u64 extent_start;
u64 bytenr;
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
- extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
- em->len = extent_end - extent_start;
+ em->len = btrfs_file_extent_end(path) - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
@@ -1299,9 +1298,12 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->flags |= EXTENT_FLAG_PREALLOC;
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+ /* Tree-checker has ensured this. */
+ ASSERT(extent_start == 0);
+
em->block_start = EXTENT_MAP_INLINE;
- em->start = extent_start;
- em->len = extent_end - extent_start;
+ em->start = 0;
+ em->len = fs_info->sectorsize;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
@@ -1334,12 +1336,10 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path)
ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
- end = btrfs_file_extent_ram_bytes(leaf, fi);
- end = ALIGN(key.offset + end, leaf->fs_info->sectorsize);
- } else {
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE)
+ end = leaf->fs_info->sectorsize;
+ else
end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- }
return end;
}
--
2.44.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v5 3/3] btrfs: add extra sanity checks for create_io_em()
2024-04-15 23:35 [PATCH v5 0/3] btrfs: more explaination on extent_map members Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 1/3] btrfs: add extra comments " Qu Wenruo
2024-04-15 23:35 ` [PATCH v5 2/3] btrfs: simplify the inline extent map creation Qu Wenruo
@ 2024-04-15 23:35 ` Qu Wenruo
2 siblings, 0 replies; 4+ messages in thread
From: Qu Wenruo @ 2024-04-15 23:35 UTC (permalink / raw)
To: linux-btrfs; +Cc: Filipe Manana
The function create_io_em() is called before we submit an IO, to update
the in-memory extent map for the involved range.
This patch changes the following aspects:
- Does not allow BTRFS_ORDERED_NOCOW type
For real NOCOW (excluding NOCOW writes into preallocated ranges)
writes, we never call create_io_em(), as we does not need to update
the extent map at all.
So remove the sanity check allowing BTRFS_ORDERED_NOCOW type.
- Add extra sanity checks
* PREALLOC
- @block_len == len
For uncompressed writes.
* REGULAR
- @block_len == @orig_block_len == @ram_bytes == @len
We're creating a new uncompressed extent, and referring all of it.
- @orig_start == @start
We haven no offset inside the extent.
* COMPRESSED
- valid @compress_type
- @len <= @ram_bytes
This is to co-operate with encoded writes, which can cause a new
file extent referring only part of a uncompressed extent.
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/inode.c | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6f2b5d1dee1..ced916f42bab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7320,11 +7320,49 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
struct extent_map *em;
int ret;
+ /*
+ * Note the missing of NOCOW type.
+ *
+ * For pure NOCOW writes, we should not create an io extent map,
+ * but just reusing the existing one.
+ * Only PREALLOC writes (NOCOW write into preallocated range) can
+ * create io extent map.
+ */
ASSERT(type == BTRFS_ORDERED_PREALLOC ||
type == BTRFS_ORDERED_COMPRESSED ||
- type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_REGULAR);
+ switch (type) {
+ case BTRFS_ORDERED_PREALLOC:
+ /* Uncompressed extents. */
+ ASSERT(block_len == len);
+
+ /* We're only referring part of a larger preallocated extent. */
+ ASSERT(block_len <= ram_bytes);
+ break;
+ case BTRFS_ORDERED_REGULAR:
+ /* Uncompressed extents. */
+ ASSERT(block_len == len);
+
+ /* COW results a new extent matching our file extent size. */
+ ASSERT(orig_block_len == len);
+ ASSERT(ram_bytes == len);
+
+ /* Since it's a new extent, we should not have any offset. */
+ ASSERT(orig_start == start);
+ break;
+ case BTRFS_ORDERED_COMPRESSED:
+ /* Must be compressed. */
+ ASSERT(compress_type != BTRFS_COMPRESS_NONE);
+
+ /*
+ * Encoded write can make us to refer to part of the
+ * uncompressed extent.
+ */
+ ASSERT(len <= ram_bytes);
+ break;
+ }
+
em = alloc_extent_map();
if (!em)
return ERR_PTR(-ENOMEM);
--
2.44.0
^ permalink raw reply related [flat|nested] 4+ messages in thread