linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Ted Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: [PATCH 04/12] ext4: Disable merging of uninitialized extents
Date: Fri, 18 Jan 2013 13:00:38 +0100	[thread overview]
Message-ID: <1358510446-19174-5-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1358510446-19174-1-git-send-email-jack@suse.cz>

Merging of uninitialized extents creates all sorts of interesting race
possibilities when writeback / DIO races with fallocate. Thus
ext4_convert_unwritten_extents_endio() has to deal with a case where
extent to be converted needs to be split out first. That isn't nice
for two reasons:

1) It may need allocation of extent tree block so ENOSPC is possible.
2) It complicates end_io handling code

So we disable merging of uninitialized extents which allows us to simplify
the code. Extents will get merged after they are converted to initialized
ones.

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c |   61 +++++++++++++++-------------------------------------
 1 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 26af228..f1ce33a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -54,9 +54,6 @@
 #define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
 #define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
 
-#define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */
-#define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */
-
 static __le32 ext4_extent_block_csum(struct inode *inode,
 				     struct ext4_extent_header *eh)
 {
@@ -1579,20 +1576,17 @@ int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-	unsigned short ext1_ee_len, ext2_ee_len, max_len;
+	unsigned ext1_ee_len, ext2_ee_len;
 
 	/*
-	 * Make sure that either both extents are uninitialized, or
-	 * both are _not_.
+	 * Make sure that both extents are initialized. We don't merge
+	 * uninitialized extents so that we can be sure that end_io code has
+	 * the extent that was written properly split out and conversion to
+	 * initialized is trivial.
 	 */
-	if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+	if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
 		return 0;
 
-	if (ext4_ext_is_uninitialized(ex1))
-		max_len = EXT_UNINIT_MAX_LEN;
-	else
-		max_len = EXT_INIT_MAX_LEN;
-
 	ext1_ee_len = ext4_ext_get_actual_len(ex1);
 	ext2_ee_len = ext4_ext_get_actual_len(ex2);
 
@@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	 * as an RO_COMPAT feature, refuse to merge to extents if
 	 * this can result in the top bit of ee_len being set.
 	 */
-	if (ext1_ee_len + ext2_ee_len > max_len)
+	if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
 		return 0;
 #ifdef AGGRESSIVE_TEST
 	if (ext1_ee_len >= 4)
@@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle,
 	unsigned int ee_len, depth;
 	int err = 0;
 
-	BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
-	       (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
-
 	ext_debug("ext4_split_extents_at: inode %lu, logical"
 		"block %llu\n", inode->i_ino, (unsigned long long)split);
 
@@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle,
 
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
-		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
-			if (split_flag & EXT4_EXT_DATA_VALID1)
-				err = ext4_ext_zeroout(inode, ex2);
-			else
-				err = ext4_ext_zeroout(inode, ex);
-		} else
-			err = ext4_ext_zeroout(inode, &orig_ex);
-
+		err = ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
 		/* update the extent length and mark as initialized */
@@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle,
 		if (uninitialized)
 			split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
 				       EXT4_EXT_MARK_UNINIT2;
-		if (split_flag & EXT4_EXT_DATA_VALID2)
-			split_flag1 |= EXT4_EXT_DATA_VALID1;
 		err = ext4_split_extent_at(handle, inode, path,
 				map->m_lblk + map->m_len, split_flag1, flags1);
 		if (err)
@@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle,
 		return PTR_ERR(path);
 
 	if (map->m_lblk >= ee_block) {
-		split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
-					    EXT4_EXT_DATA_VALID2);
+		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
 		if (uninitialized)
 			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
 		if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 
 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 	split_flag |= EXT4_EXT_MARK_UNINIT2;
-	if (flags & EXT4_GET_BLOCKS_CONVERT)
-		split_flag |= EXT4_EXT_DATA_VALID2;
+
 	flags |= EXT4_GET_BLOCKS_PRE_IO;
 	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
@@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 		"block %llu, max_blocks %u\n", inode->i_ino,
 		  (unsigned long long)ee_block, ee_len);
 
-	/* If extent is larger than requested then split is required */
+	/* Extent is larger than requested? */
 	if (ee_block != map->m_lblk || ee_len > map->m_len) {
-		err = ext4_split_unwritten_extents(handle, inode, map, path,
-						   EXT4_GET_BLOCKS_CONVERT);
-		if (err < 0)
-			goto out;
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode, map->m_lblk, path);
-		if (IS_ERR(path)) {
-			err = PTR_ERR(path);
-			goto out;
-		}
-		depth = ext_depth(inode);
-		ex = path[depth].p_ext;
+		EXT4_ERROR_INODE(inode, "Written extent modified before IO"
+			" finished: extent logical block %llu, len %u; IO"
+			" logical block %llu, len %u\n",
+			(unsigned long long)ee_block, ee_len,
+			(unsigned long long)map->m_lblk, map->m_len);
+		err = -EIO;
+		goto out;
 	}
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
-- 
1.7.1


  parent reply	other threads:[~2013-01-18 12:00 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-18 12:00 [PATCH 0/12 v2] ext4: Several simplifications and fixes Jan Kara
2013-01-18 12:00 ` [PATCH 01/12] ext4: Always use ext4_bio_write_page() for writeout Jan Kara
2013-01-28 14:31   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 02/12] ext4: Use redirty_page_for_writepage() in ext4_bio_write_page() Jan Kara
2013-01-28 14:34   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 03/12] ext4: Remove bogus wait for unwritten extents in ext4_ind_direct_IO Jan Kara
2013-01-22 11:11   ` Dmitry Monakhov
2013-01-22 13:44     ` Jan Kara
2013-01-22 14:12       ` Dmitry Monakhov
2013-01-22 15:21         ` Jan Kara
2013-01-22 14:22       ` Zheng Liu
2013-01-22 15:22         ` Jan Kara
2013-01-22 16:00           ` Zheng Liu
2013-01-22 23:14             ` Jan Kara
2013-01-23  6:11               ` Zheng Liu
2013-01-23  9:42                 ` Jan Kara
2013-01-18 12:00 ` Jan Kara [this message]
2013-01-24  9:49   ` [PATCH 04/12] ext4: Disable merging of uninitialized extents Dmitry Monakhov
2013-01-24 15:12     ` Jan Kara
2013-01-24 15:32       ` Dmitry Monakhov
2013-01-28 14:36     ` Theodore Ts'o
2013-01-28 15:02       ` Dmitry Monakhov
2013-01-28 15:38         ` Theodore Ts'o
2013-01-29  7:41           ` Dmitry Monakhov
2013-01-29  8:37             ` Zheng Liu
2013-01-31  7:47     ` Dmitry Monakhov
2013-01-31 12:39       ` Jan Kara
2013-01-31 14:09         ` Dmitry Monakhov
2013-01-31 16:54       ` Theodore Ts'o
2013-02-09 17:10   ` REGRESSION: " Theodore Ts'o
2013-02-12 21:58     ` Jan Kara
2013-02-13  4:57       ` Theodore Ts'o
2013-02-13  7:26         ` Dmitry Monakhov
2013-02-13 15:08           ` Merge window planning for ext4 and Ted's vacation Theodore Ts'o
2013-02-14 10:47           ` REGRESSION: [PATCH 04/12] ext4: Disable merging of uninitialized extents Jan Kara
2013-02-14 16:11     ` Jan Kara
2013-02-14 19:05       ` Theodore Ts'o
2013-02-14 21:32         ` Jan Kara
2013-01-18 12:00 ` [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() Jan Kara
2013-01-18 12:00 ` [PATCH 06/12] ext4: Move work from io_end to inode Jan Kara
2013-01-28 14:45   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 07/12] ext4: Simplify list handling in ext4_do_flush_completed_IO() Jan Kara
2013-01-28 14:51   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 08/12] ext4: Remove __ext4_journalled_writepage() from mpage_da_submit_io() Jan Kara
2013-01-28 14:40   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 09/12] ext4: Dirty page has always buffers attached Jan Kara
2013-01-28 17:55   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 10/12] ext4: Simplify mpage_add_bh_to_extent() Jan Kara
2013-01-28 18:06   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 11/12] ext4: Make ext4_bio_writepage() handle unprepared buffers Jan Kara
2013-01-29  1:59   ` Theodore Ts'o
2013-01-18 12:00 ` [PATCH 12/12] ext4: Fix ext4_writepage() to achieve data=ordered guarantees Jan Kara
2013-01-29  2:08   ` Theodore Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358510446-19174-5-git-send-email-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).