From: Jan Kara <jack@suse.cz>
To: Ted Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: [PATCH 04/12] ext4: Disable merging of uninitialized extents
Date: Fri, 18 Jan 2013 13:00:38 +0100 [thread overview]
Message-ID: <1358510446-19174-5-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1358510446-19174-1-git-send-email-jack@suse.cz>
Merging of uninitialized extents creates all sorts of interesting race
possibilities when writeback / DIO races with fallocate. Thus
ext4_convert_unwritten_extents_endio() has to deal with a case where
extent to be converted needs to be split out first. That isn't nice
for two reasons:
1) It may need allocation of extent tree block so ENOSPC is possible.
2) It complicates end_io handling code
So we disable merging of uninitialized extents which allows us to simplify
the code. Extents will get merged after they are converted to initialized
ones.
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ext4/extents.c | 61 +++++++++++++++-------------------------------------
1 files changed, 18 insertions(+), 43 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 26af228..f1ce33a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -54,9 +54,6 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
-#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
-#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
-
static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh)
{
@@ -1579,20 +1576,17 @@ int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
- unsigned short ext1_ee_len, ext2_ee_len, max_len;
+ unsigned ext1_ee_len, ext2_ee_len;
/*
- * Make sure that either both extents are uninitialized, or
- * both are _not_.
+ * Make sure that both extents are initialized. We don't merge
+ * uninitialized extents so that we can be sure that end_io code has
+ * the extent that was written properly split out and conversion to
+ * initialized is trivial.
*/
- if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+ if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
return 0;
- if (ext4_ext_is_uninitialized(ex1))
- max_len = EXT_UNINIT_MAX_LEN;
- else
- max_len = EXT_INIT_MAX_LEN;
-
ext1_ee_len = ext4_ext_get_actual_len(ex1);
ext2_ee_len = ext4_ext_get_actual_len(ex2);
@@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* as an RO_COMPAT feature, refuse to merge to extents if
* this can result in the top bit of ee_len being set.
*/
- if (ext1_ee_len + ext2_ee_len > max_len)
+ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
#ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4)
@@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth;
int err = 0;
- BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
- (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
-
ext_debug("ext4_split_extents_at: inode %lu, logical"
"block %llu\n", inode->i_ino, (unsigned long long)split);
@@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
- if (split_flag & EXT4_EXT_DATA_VALID1)
- err = ext4_ext_zeroout(inode, ex2);
- else
- err = ext4_ext_zeroout(inode, ex);
- } else
- err = ext4_ext_zeroout(inode, &orig_ex);
-
+ err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
@@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle,
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2;
- if (split_flag & EXT4_EXT_DATA_VALID2)
- split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
@@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path);
if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
- EXT4_EXT_DATA_VALID2);
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
- if (flags & EXT4_GET_BLOCKS_CONVERT)
- split_flag |= EXT4_EXT_DATA_VALID2;
+
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
@@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)ee_block, ee_len);
- /* If extent is larger than requested then split is required */
+ /* Extent is larger than requested? */
if (ee_block != map->m_lblk || ee_len > map->m_len) {
- err = ext4_split_unwritten_extents(handle, inode, map, path,
- EXT4_GET_BLOCKS_CONVERT);
- if (err < 0)
- goto out;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
+ EXT4_ERROR_INODE(inode, "Written extent modified before IO"
+ " finished: extent logical block %llu, len %u; IO"
+ " logical block %llu, len %u\n",
+ (unsigned long long)ee_block, ee_len,
+ (unsigned long long)map->m_lblk, map->m_len);
+ err = -EIO;
+ goto out;
}
err = ext4_ext_get_access(handle, inode, path + depth);
--
1.7.1
next prev parent reply other threads:[~2013-01-18 12:00 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-18 12:00 [PATCH 0/12 v2] ext4: Several simplifications and fixes Jan Kara
2013-01-18 12:00 ` [PATCH 01/12] ext4: Always use ext4_bio_write_page() for writeout Jan Kara
2013-01-28 14:31 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 02/12] ext4: Use redirty_page_for_writepage() in ext4_bio_write_page() Jan Kara
2013-01-28 14:34 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 03/12] ext4: Remove bogus wait for unwritten extents in ext4_ind_direct_IO Jan Kara
2013-01-22 11:11 ` Dmitry Monakhov
2013-01-22 13:44 ` Jan Kara
2013-01-22 14:12 ` Dmitry Monakhov
2013-01-22 15:21 ` Jan Kara
2013-01-22 14:22 ` Zheng Liu
2013-01-22 15:22 ` Jan Kara
2013-01-22 16:00 ` Zheng Liu
2013-01-22 23:14 ` Jan Kara
2013-01-23 6:11 ` Zheng Liu
2013-01-23 9:42 ` Jan Kara
2013-01-18 12:00 ` Jan Kara [this message]
2013-01-24 9:49 ` [PATCH 04/12] ext4: Disable merging of uninitialized extents Dmitry Monakhov
2013-01-24 15:12 ` Jan Kara
2013-01-24 15:32 ` Dmitry Monakhov
2013-01-28 14:36 ` Theodore Ts'o
2013-01-28 15:02 ` Dmitry Monakhov
2013-01-28 15:38 ` Theodore Ts'o
2013-01-29 7:41 ` Dmitry Monakhov
2013-01-29 8:37 ` Zheng Liu
2013-01-31 7:47 ` Dmitry Monakhov
2013-01-31 12:39 ` Jan Kara
2013-01-31 14:09 ` Dmitry Monakhov
2013-01-31 16:54 ` Theodore Ts'o
2013-02-09 17:10 ` REGRESSION: " Theodore Ts'o
2013-02-12 21:58 ` Jan Kara
2013-02-13 4:57 ` Theodore Ts'o
2013-02-13 7:26 ` Dmitry Monakhov
2013-02-13 15:08 ` Merge window planning for ext4 and Ted's vacation Theodore Ts'o
2013-02-14 10:47 ` REGRESSION: [PATCH 04/12] ext4: Disable merging of uninitialized extents Jan Kara
2013-02-14 16:11 ` Jan Kara
2013-02-14 19:05 ` Theodore Ts'o
2013-02-14 21:32 ` Jan Kara
2013-01-18 12:00 ` [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() Jan Kara
2013-01-18 12:00 ` [PATCH 06/12] ext4: Move work from io_end to inode Jan Kara
2013-01-28 14:45 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 07/12] ext4: Simplify list handling in ext4_do_flush_completed_IO() Jan Kara
2013-01-28 14:51 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 08/12] ext4: Remove __ext4_journalled_writepage() from mpage_da_submit_io() Jan Kara
2013-01-28 14:40 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 09/12] ext4: Dirty page has always buffers attached Jan Kara
2013-01-28 17:55 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 10/12] ext4: Simplify mpage_add_bh_to_extent() Jan Kara
2013-01-28 18:06 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 11/12] ext4: Make ext4_bio_writepage() handle unprepared buffers Jan Kara
2013-01-29 1:59 ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 12/12] ext4: Fix ext4_writepage() to achieve data=ordered guarantees Jan Kara
2013-01-29 2:08 ` Theodore Ts'o
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1358510446-19174-5-git-send-email-jack@suse.cz \
--to=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).