* [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible
@ 2012-08-09 12:44 Theodore Ts'o
2012-08-09 12:56 ` Theodore Ts'o
2012-08-09 13:07 ` Theodore Ts'o
0 siblings, 2 replies; 3+ messages in thread
From: Theodore Ts'o @ 2012-08-09 12:44 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Theodore Ts'o
If an inode has more than 4 extents, but then later some of the
extents are merged together, we can optimize the file system by moving
the extents up into the inode, and discarding extent tree block. This
is important, because if there are a large number of inodes with an
external extent tree blocks where the contents could fit in the inode,
this can significantly increase the fsck time of the file system.
Google-Bug-Id: 6801242
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
fs/ext4/extents.c | 76 +++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 62 insertions(+), 14 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed..544d28c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1656,16 +1656,64 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
}
/*
+ * This function does a very simple check to see if we can collapse
+ * an extent tree with a single extent tree leaf block into the inode.
+ */
+static void ext4_ext_try_to_merge_up(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ size_t s;
+ unsigned max_root = ext4_ext_space_root(inode, 0);
+ ext4_fsblk_t blk;
+
+ if ((path[0].p_depth != 1) ||
+ (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
+ (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
+ return;
+
+ /*
+ * We need to modify the block allocation bitmap and the block
+ * group descriptor to release the extent tree block. If we
+ * can't get the journal credits, give up.
+ */
+ if (ext4_journal_extend(handle, 2))
+ return;
+
+ /*
+ * Copy the extent data up to the inode
+ */
+ blk = ext4_idx_pblock(path[0].p_idx);
+ s = le16_to_cpu(path[1].p_hdr->eh_entries) *
+ sizeof(struct ext4_extent_idx);
+ s += sizeof(struct ext4_extent_header);
+
+ memcpy(path[0].p_hdr, path[1].p_hdr, s);
+ path[0].p_depth = 0;
+ path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
+ (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
+ path[0].p_hdr->eh_max = cpu_to_le16(max_root);
+
+ brelse(path[1].p_bh);
+ ext4_free_blocks(handle, inode, NULL, blk, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+ ext4_msg(inode->i_sb, KERN_ERR, "Merge up: ino %lu, blk %llu, "
+ "extents %u",
+ inode->i_ino, (unsigned long long) blk,
+ le16_to_cpu(path[0].p_hdr->eh_entries));
+}
+
+/*
* This function tries to merge the @ex extent to neighbours in the tree.
* return 1 if merge left else 0.
*/
-static int ext4_ext_try_to_merge(struct inode *inode,
+static void ext4_ext_try_to_merge(handle_t *handle,
+ struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex) {
struct ext4_extent_header *eh;
unsigned int depth;
int merge_done = 0;
- int ret = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1723,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
if (!merge_done)
- ret = ext4_ext_try_to_merge_right(inode, path, ex);
+ (void) ext4_ext_try_to_merge_right(inode, path, ex);
- return ret;
+ ext4_ext_try_to_merge_up(handle, inode, path);
}
/*
@@ -1893,7 +1941,7 @@ has_space:
merge:
/* try to merge extents */
if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, nearex);
+ ext4_ext_try_to_merge(handle, inode, path, nearex);
/* time to correct all indexes above */
@@ -1901,7 +1949,7 @@ merge:
if (err)
goto cleanup;
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
if (npath) {
@@ -2923,9 +2971,9 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_initialized(ex);
if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, ex);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@@ -2957,8 +3005,8 @@ static int ext4_split_extent_at(handle_t *handle,
goto fix_extent_len;
/* update the extent length and mark as initialized */
ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
} else if (err)
goto fix_extent_len;
@@ -3190,8 +3238,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (err)
goto out;
ext4_ext_mark_initialized(ex);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@@ -3332,10 +3380,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
/* note: ext4_ext_correct_indexes() isn't needed here because
* borders are not changed
*/
- ext4_ext_try_to_merge(inode, path, ex);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
/* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
return err;
--
1.7.12.rc0.22.gcdd159b
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible
2012-08-09 12:44 [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible Theodore Ts'o
@ 2012-08-09 12:56 ` Theodore Ts'o
2012-08-09 13:07 ` Theodore Ts'o
1 sibling, 0 replies; 3+ messages in thread
From: Theodore Ts'o @ 2012-08-09 12:56 UTC (permalink / raw)
To: Ext4 Developers List
On Thu, Aug 09, 2012 at 08:44:43AM -0400, Theodore Ts'o wrote:
> If an inode has more than 4 extents, but then later some of the
> extents are merged together, we can optimize the file system by moving
> the extents up into the inode, and discarding extent tree block. This
> is important, because if there are a large number of inodes with an
> external extent tree blocks where the contents could fit in the inode,
> this can significantly increase the fsck time of the file system.
This version of the patch has an ext4_msg() which I will remove in the
final version of the patch, but it's interesting because it shows how
much the code path is being exercised.
We see that for fsstress, we do definitely exercise the code path,
which is good --- and it does mean that we end up with a more
efficient extent tree structure:
013 [ 150.077567] EXT4-fs (vdb): Merge up: ino 145516, blk 646045, extents 4
[ 166.027004] EXT4-fs (vdb): Merge up: ino 145666, blk 646800, extents 4
[ 169.071057] EXT4-fs (vdb): Merge up: ino 145549, blk 646604, extents 3
[ 173.454559] EXT4-fs (vdb): Merge up: ino 145525, blk 646319, extents 2
[ 175.192647] EXT4-fs (vdb): Merge up: ino 145872, blk 650733, extents 3
[ 181.479258] EXT4-fs (vdb): Merge up: ino 145542, blk 642934, extents 3
[ 186.981250] EXT4-fs (vdb): Merge up: ino 145531, blk 647727, extents 3
[ 189.322060] EXT4-fs (vdb): Merge up: ino 146380, blk 650804, extents 3
[ 191.733159] EXT4-fs (vdb): Merge up: ino 145511, blk 646831, extents 4
[ 192.160757] EXT4-fs (vdb): Merge up: ino 145576, blk 647309, extents 4
[ 193.496868] EXT4-fs (vdb): Merge up: ino 145607, blk 648214, extents 3
[ 201.062707] EXT4-fs (vdb): Merge up: ino 145505, blk 642449, extents 3
[ 207.962396] EXT4-fs (vdb): Merge up: ino 145575, blk 650461, extents 4
[ 212.571402] EXT4-fs (vdb): Merge up: ino 146332, blk 651223, extents 3
[ 221.973210] EXT4-fs (vdb): Merge up: ino 146109, blk 651455, extents 4
[ 223.294616] EXT4-fs (vdb): Merge up: ino 146171, blk 650612, extents 3
[ 239.745663] EXT4-fs (vdb): Merge up: ino 146109, blk 651138, extents 3
[ 241.460200] EXT4-fs (vdb): Merge up: ino 145569, blk 646818, extents 4
160s
However, then there are workloads such as fsx where the results of
this patch are not so good:
091 [ 650.736485] EXT4-fs (vdb): Merge up: ino 14, blk 61095, extents 3
[ 650.743739] EXT4-fs (vdb): Merge up: ino 14, blk 62879, extents 3
[ 650.786122] EXT4-fs (vdb): Merge up: ino 14, blk 62929, extents 3
[ 650.804720] EXT4-fs (vdb): Merge up: ino 14, blk 62282, extents 4
[ 650.808706] EXT4-fs (vdb): Merge up: ino 14, blk 62283, extents 3
[ 651.207374] EXT4-fs (vdb): Merge up: ino 14, blk 62502, extents 2
[ 651.213186] EXT4-fs (vdb): Merge up: ino 14, blk 62503, extents 4
[ 651.232026] EXT4-fs (vdb): Merge up: ino 14, blk 62938, extents 2
[ 651.251615] EXT4-fs (vdb): Merge up: ino 14, blk 62951, extents 4
[ 651.548798] EXT4-fs (vdb): Merge up: ino 14, blk 64947, extents 4
[ 651.577057] EXT4-fs (vdb): Merge up: ino 14, blk 62662, extents 3
[ 651.596868] EXT4-fs (vdb): Merge up: ino 14, blk 62663, extents 4
[ 651.884237] EXT4-fs (vdb): Merge up: ino 14, blk 62930, extents 4
[ 651.889220] EXT4-fs (vdb): Merge up: ino 14, blk 62931, extents 4
[ 651.929939] EXT4-fs (vdb): Merge up: ino 14, blk 62936, extents 3
[ 651.955671] EXT4-fs (vdb): Merge up: ino 14, blk 62937, extents 4
[ 651.960726] EXT4-fs (vdb): Merge up: ino 14, blk 65502, extents 4
... [ 960 additional, similar lines elided for brevity's sake ]
Granted, fsx is an extreme test program which is not likely to be
representative of real-life workloads. However, it is fair to note
that the merge up code does have a cost, since it's possible that we
could repeatedly allocate and deallocate the external extent tree
block.
I'm not sure I care, but I'd appreciate any other thoughts people
might have on the subject. Can anyone think of a real-life scenario
where this might be an issue?
Thanks,
- Ted
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible
2012-08-09 12:44 [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible Theodore Ts'o
2012-08-09 12:56 ` Theodore Ts'o
@ 2012-08-09 13:07 ` Theodore Ts'o
1 sibling, 0 replies; 3+ messages in thread
From: Theodore Ts'o @ 2012-08-09 13:07 UTC (permalink / raw)
To: Ext4 Developers List
Here's a test script which demonstrates how we can up with an external
extent tree block containing a single extent. It needs to run as root
since it will be mounting a test file system image on /mnt.
#!/bin/bash
fs=/u1/test-fs.img
num=128
max=$(expr $num \* 8)
size=$(expr $num \* 32)k
dd if=/dev/zero of=$fs bs=4k count=$(expr $max \* 4)
mke2fs -t ext4 -b 4096 -qF $fs
mount -t ext4 $fs /mnt
cd /mnt
fallocate -l $size test-file
for i in $(seq 0 8 $max | sed -e '$d') ; do
dd if=/dev/zero of=test-file conv=notrunc,nocreat bs=4k \
seek=$i count=1 > /dev/null 2>&1
done
sync
dd if=/dev/zero of=test-file conv=notrunc,nocreat \
bs=4k count=$max > /dev/null 2>&1
sync
cd /
umount /mnt
debugfs $fs -R "stat test-file"
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-08-09 13:07 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-09 12:44 [PATCH RFC] ext4: collapse a single extent tree block into the inode if possible Theodore Ts'o
2012-08-09 12:56 ` Theodore Ts'o
2012-08-09 13:07 ` Theodore Ts'o
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).