From: Dmitry Monakhov <dmonakhov@openvz.org>
To: linux-ext4@vger.kernel.org
Cc: aneesh.kumar@linux.vnet.ibm.com, tytso@mit.edu,
Dmitry Monakhov <dmonakhov@openvz.org>
Subject: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
Date: Fri, 9 Apr 2010 21:14:30 +0400 [thread overview]
Message-ID: <1270833270-14087-1-git-send-email-dmonakhov@openvz.org> (raw)
Zerrout trick allow us to optimize cases where it is more reasonable
to explicitly zeroout extent and mark it as initialized instead of
splitting to several small ones.
But this optimization is not acceptable is extent is beyond i_size
Because it is not possible to have initialized blocks after i_size.
Fsck treat this as incorrect inode size.
#BUG# (here suppose to be bug number, but bugzilla.kernel.org is too
dammit slow)
##TESTCASE
/***********************************************
gcc -Wall falloc_test.c -ofalloc_test
This testcase check write to fallocated space
mkfs.ext4 /dev/sdb1
mount /dev/sdb1 /mnt
./falloc_test /mnt/F1
umount /mnt
fsck.ext4 -f /dev/sdb1
***********************************************/
#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
int main(int argc, char **argv)
{
loff_t len, offset;
int fd, ret;
if (argc != 2) {
printf("Usage: %s <fname>\n", argv[0]);
return 1;
}
fd = open(argv[1], O_CREAT|O_RDWR, 0777);
/* 8192 is less than EXT4_EXT_ZERO_LEN */
ret = fallocate(fd, 0x1, 0, 8192);
if (ret)
return ret;
/* Provoke reserved space convertation */
return write(fd, "1", 1) != 1;
}
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
fs/ext4/extents.c | 62 +++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 49 insertions(+), 13 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8bdee27..54a9b80 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1,3 +1,4 @@
+
/*
* Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
* Written by Alex Tomas <alex@clusterfs.com>
@@ -2540,9 +2541,11 @@ static void bi_complete(struct bio *bio, int error)
{
complete((struct completion *)bio->bi_private);
}
-
+#define ext4_ext_zeroout(inode,ext_nn) \
+ __ext4_ext_zeroout(inode, ext_nn, iblock, max_blocks)
/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+static int __ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex,
+ ext4_lblk_t iblock, unsigned int max_blocks)
{
int ret;
struct bio *bio;
@@ -2560,6 +2563,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
/* convert ee_pblock to 512 byte sectors */
ee_pblock = ee_pblock << (blkbits - 9);
+ printk("%s inode:%lu ext:[%d:%d] iblock:%d max:%d i_sz:%lld expand:%d\n",
+ __FUNCTION__, inode->i_ino,
+ le32_to_cpu(ex->ee_block), ee_len,
+ iblock, max_blocks, inode->i_size,
+ ((inode->i_size +blocksize -1) >> blkbits) < iblock + max_blocks);
+
while (ee_len > 0) {
if (ee_len > BIO_MAX_PAGES)
@@ -2631,11 +2640,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;
+ int may_zeroout;
+ ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+ "block %llu, max_blocks %u ",
+ inode->i_ino, (unsigned long long)iblock, max_blocks);
depth = ext_depth(inode);
eh = path[depth].p_hdr;
@@ -2644,16 +2657,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2684,7 +2706,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (allocated > max_blocks) {
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
- if (allocated <= EXT4_EXT_ZERO_LEN) {
+ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
/*
* iblock == ee_block is handled by the zerouout
* at the beginning.
@@ -2760,7 +2782,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2784,8 +2806,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* update the extent length after successful insert of the
* split extent
*/
- orig_ex.ee_len = cpu_to_le16(ee_len -
- ext4_ext_get_actual_len(ex3));
+ ee_len -= ext4_ext_get_actual_len(ex3);
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
+
depth = newdepth;
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2834,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* otherwise give the extent a chance to merge to left
*/
if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
- iblock != ee_block) {
+ iblock != ee_block && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2878,7 +2903,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2938,14 +2963,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
+ int may_zeroout;
ext_debug("ext4_split_unwritten_extents: inode %lu,"
"iblock %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)iblock, max_blocks);
+
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -2953,10 +2980,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
/*
* If the uninitialized extent begins at the same logical
@@ -2992,7 +3028,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -3063,7 +3099,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
--
1.6.6.1
next reply other threads:[~2010-04-09 17:14 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-09 17:14 Dmitry Monakhov [this message]
2010-04-09 17:18 ` [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size Dmitry Monakhov
-- strict thread matches above, loose matches on Subject: below --
2010-04-09 17:22 Dmitry Monakhov
2010-04-14 21:28 ` tytso
2010-04-15 9:20 ` Dmitry Monakhov
2010-04-21 17:45 ` tytso
2010-04-28 4:40 ` Aneesh Kumar K. V
2010-04-28 7:38 ` Dmitry Monakhov
2010-05-27 17:19 ` Aneesh Kumar K. V
2010-06-03 8:32 ` Dmitry Monakhov
2010-06-08 21:46 ` tytso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1270833270-14087-1-git-send-email-dmonakhov@openvz.org \
--to=dmonakhov@openvz.org \
--cc=aneesh.kumar@linux.vnet.ibm.com \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).