From: Theodore Ts'o <tytso@mit.edu>
To: stable@vger.kernel.org
Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>,
Dmitry Monakhov <dmonakhov@openvz.org>,
"Theodore Ts'o" <tytso@mit.edu>
Subject: [PATCH 2.6.33.y 27/40] ext4: Do not zero out uninitialized extents beyond i_size
Date: Tue, 1 Jun 2010 08:03:14 -0400 [thread overview]
Message-ID: <1275393807-14369-27-git-send-email-tytso@mit.edu> (raw)
In-Reply-To: <1275393807-14369-1-git-send-email-tytso@mit.edu>
From: Dmitry Monakhov <dmonakhov@openvz.org>
commit 21ca087a3891efab4d45488db8febee474d26c68 upstream (as of v2.6.34-git13)
The extents code will sometimes zero out blocks and mark them as
initialized instead of splitting an extent into several smaller ones.
This optimization however, causes problems if the extent is beyond
i_size because fsck will complain if there are uninitialized blocks
after i_size as this can not be distinguished from an inode that has
an incorrect i_size field.
https://bugzilla.kernel.org/show_bug.cgi?id=15742
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
fs/ext4/extents.c | 67 ++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 032c95b..39aab5c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2527,11 +2527,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;
+ int may_zeroout;
+
+ ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+ "block %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)iblock, max_blocks);
+
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+ if (eof_block < iblock + max_blocks)
+ eof_block = iblock + max_blocks;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
@@ -2540,16 +2550,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= eof_block;
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2580,7 +2597,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (allocated > max_blocks) {
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
- if (allocated <= EXT4_EXT_ZERO_LEN) {
+ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
/*
* iblock == ee_block is handled by the zerouout
* at the beginning.
@@ -2656,7 +2673,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2680,8 +2697,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* update the extent length after successful insert of the
* split extent
*/
- orig_ex.ee_len = cpu_to_le16(ee_len -
- ext4_ext_get_actual_len(ex3));
+ ee_len -= ext4_ext_get_actual_len(ex3);
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ may_zeroout = ee_block + ee_len <= eof_block;
+
depth = newdepth;
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path);
@@ -2705,7 +2724,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* otherwise give the extent a chance to merge to left
*/
if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
- iblock != ee_block) {
+ iblock != ee_block && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2774,7 +2793,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2834,14 +2853,21 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
+ int may_zeroout;
+
+ ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
+ "block %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)iblock, max_blocks);
+
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+ if (eof_block < iblock + max_blocks)
+ eof_block = iblock + max_blocks;
- ext_debug("ext4_split_unwritten_extents: inode %lu,"
- "iblock %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)iblock, max_blocks);
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -2849,12 +2875,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
/*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= eof_block;
+
+ /*
* If the uninitialized extent begins at the same logical
* block where the write begins, and the write completely
* covers the extent, then we don't need to split it.
@@ -2888,7 +2921,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2912,8 +2945,10 @@ static int ext4_split_unwritten_extents(handle_t *handle,
* update the extent length after successful insert of the
* split extent
*/
- orig_ex.ee_len = cpu_to_le16(ee_len -
- ext4_ext_get_actual_len(ex3));
+ ee_len -= ext4_ext_get_actual_len(ex3);
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ may_zeroout = ee_block + ee_len <= eof_block;
+
depth = newdepth;
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path);
@@ -2959,7 +2994,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
--
1.6.6.1.1.g974db.dirty
next prev parent reply other threads:[~2010-06-01 12:03 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-01 12:02 [PATCH 2.6.33.y 01/40] ext4: Use bitops to read/modify EXT4_I(inode)->i_state Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 02/40] ext4: Fix BUG_ON at fs/buffer.c:652 in no journal mode Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 03/40] ext4: Add flag to files with blocks intentionally past EOF Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 04/40] ext4: Fix fencepost error in chosing choosing group vs file preallocation Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 05/40] ext4: fix error handling in migrate Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 06/40] ext4: explicitly remove inode from orphan list after failed direct io Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 07/40] ext4: Handle non empty on-disk orphan link Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 08/40] ext4: make "offset" consistent in ext4_check_dir_entry() Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 09/40] ext4: Fix insertion point of extent in mext_insert_across_blocks() Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 10/40] ext4: Fix the NULL reference in double_down_write_data_sem() Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 11/40] ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl Theodore Ts'o
2010-06-01 12:02 ` [PATCH 2.6.33.y 12/40] ext4: Fix estimate of # of blocks needed to write indirect-mapped files Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 13/40] ext4: Fixed inode allocator to correctly track a flex_bg's used_dirs Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 14/40] ext4: Fix possible lost inode write in no journal mode Theodore Ts'o
2012-09-28 3:08 ` Yongqiang Yang
2012-09-28 3:21 ` Yongqiang Yang
2010-06-01 12:03 ` [PATCH 2.6.33.y 15/40] ext4: Fix buffer head leaks after calls to ext4_get_inode_loc() Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 16/40] ext4: Issue the discard operation *before* releasing the blocks to be reused Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 17/40] ext4: check missed return value in ext4_sync_file() Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 18/40] ext4: fix memory leaks in error path handling of ext4_ext_zeroout() Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 19/40] ext4: Remove unnecessary call to ext4_get_group_desc() in mballoc Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 20/40] ext4: rename ext4_mb_release_desc() to ext4_mb_unload_buddy() Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 21/40] ext4: allow defrag (EXT4_IOC_MOVE_EXT) in 32bit compat mode Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 22/40] ext4: fix quota accounting in case of fallocate Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 23/40] ext4: check s_log_groups_per_flex in online resize code Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 24/40] ext4: don't return to userspace after freezing the fs with a mutex held Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 25/40] ext4: stop issuing discards if not supported by device Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 26/40] ext4: don't scan/accumulate more pages than mballoc will allocate Theodore Ts'o
2010-06-01 12:03 ` Theodore Ts'o [this message]
2010-06-01 12:03 ` [PATCH 2.6.33.y 28/40] ext4: clean up inode bitmaps manipulation in ext4_free_inode Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 29/40] ext4: init statistics after journal recovery Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 30/40] ext4: Remove extraneous newlines in ext4_msg() calls Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 31/40] ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 32/40] ext4: check for a good block group before loading buddy pages Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 33/40] ext4: Show journal_checksum option Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 34/40] ext4: Use bitops to read/modify i_flags in struct ext4_inode_info Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 35/40] ext4: Avoid crashing on NULL ptr dereference on a filesystem error Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 36/40] ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 37/40] ext4: restart ext4_ext_remove_space() after transaction restart Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 38/40] ext4: Conditionally define compat ioctl numbers Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 39/40] ext4: Fix compat EXT4_IOC_ADD_GROUP Theodore Ts'o
2010-06-01 12:03 ` [PATCH 2.6.33.y 40/40] ext4: Make fsync sync new parent directories in no-journal mode Theodore Ts'o
2010-07-28 23:29 ` [stable] [PATCH 2.6.33.y 01/40] ext4: Use bitops to read/modify EXT4_I(inode)->i_state Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1275393807-14369-27-git-send-email-tytso@mit.edu \
--to=tytso@mit.edu \
--cc=dmonakhov@openvz.org \
--cc=linux-ext4@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).