linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster
@ 2011-11-09 11:16 Robin Dong
  2011-11-09 11:17 ` [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks Robin Dong
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:16 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Hi,

This patch series change ee_block and ee_len (of extent) 's unit from "block" to "cluster",
since it could reduce the space occupied by meta data.

This patch series should be used after Ted's bigalloc-patchs and it now can't support:
1. delay allocation
2. 1k/2k blocksize


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 2/9 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks Robin Dong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Since ee_len's unit change to cluster, it need to transform from clusters
to blocks when use new function: ext4_ext_get_actual_blocks.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/ext4.h         |    5 ++
 fs/ext4/ext4_extents.h |   16 ++++++-
 fs/ext4/extents.c      |  123 +++++++++++++++++++++++++++---------------------
 3 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fba951b..1dea3e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -276,6 +276,11 @@ struct ext4_io_submit {
 /* Translate # of blks to # of clusters */
 #define EXT4_NUM_B2C(sbi, blks)	(((blks) + (sbi)->s_cluster_ratio - 1) >> \
 				 (sbi)->s_cluster_bits)
+/* Translate a block number to a cluster number by inode */
+#define EXT4_INODE_B2C(inode, block) (EXT4_B2C(EXT4_SB(inode->i_sb), (block)))
+/* Translate a cluster number to a block number by inode */
+#define EXT4_INODE_C2B(inode, cluster) (EXT4_C2B(EXT4_SB(inode->i_sb), \
+				(cluster)))
 
 /*
  * Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a52db3a..30c5ce1 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -70,8 +70,10 @@
  * It's used at the bottom of the tree.
  */
 struct ext4_extent {
-	__le32	ee_block;	/* first logical block extent covers */
-	__le16	ee_len;		/* number of blocks covered by extent */
+	__le32	ee_block;	/* first logical block (or cluster) *
+				 * extent covers */
+	__le16	ee_len;		/* number of blocks (or clusters) *
+				 * covered by extent */
 	__le16	ee_start_hi;	/* high 16 bits of physical block */
 	__le32	ee_start_lo;	/* low 32 bits of physical block */
 };
@@ -212,6 +214,16 @@ static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
 	return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
 }
 
+static inline int ext4_ext_get_actual_blocks(struct ext4_extent *ext,
+		struct super_block *sb)
+{
+	int res = (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+		le16_to_cpu(ext->ee_len) :
+		(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+
+	return EXT4_C2B(EXT4_SB(sb), res);
+}
+
 static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
 {
 	return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4c38262..597ebcb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -304,7 +304,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
 	ext4_fsblk_t block = ext4_ext_pblock(ext);
-	int len = ext4_ext_get_actual_len(ext);
+	int len = ext4_ext_get_actual_blocks(ext, inode->i_sb);
 
 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
 }
@@ -417,7 +417,8 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 			ext_debug("  %d:[%d]%d:%llu ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  ext4_ext_is_uninitialized(path->p_ext),
-				  ext4_ext_get_actual_len(path->p_ext),
+				  ext4_ext_get_actual_blocks(path->p_ext,
+					  inode->i_sb),
 				  ext4_ext_pblock(path->p_ext));
 		} else
 			ext_debug("  []");
@@ -443,7 +444,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
 		ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
 			  ext4_ext_is_uninitialized(ex),
-			  ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
+			  ext4_ext_get_actual_blocks(ex, inode->i_sb),
+			  ext4_ext_pblock(ex));
 	}
 	ext_debug("\n");
 }
@@ -474,7 +476,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 				le32_to_cpu(ex->ee_block),
 				ext4_ext_pblock(ex),
 				ext4_ext_is_uninitialized(ex),
-				ext4_ext_get_actual_len(ex),
+				ext4_ext_get_actual_blocks(ex, inode->i_sb),
 				newblock);
 		ex++;
 	}
@@ -599,7 +601,7 @@ ext4_ext_binsearch(struct inode *inode,
 			le32_to_cpu(path->p_ext->ee_block),
 			ext4_ext_pblock(path->p_ext),
 			ext4_ext_is_uninitialized(path->p_ext),
-			ext4_ext_get_actual_len(path->p_ext));
+			ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -1222,7 +1224,7 @@ static int ext4_ext_search_left(struct inode *inode,
 	 * first one in the file */
 
 	ex = path[depth].p_ext;
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	if (*logical < le32_to_cpu(ex->ee_block)) {
 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
 			EXT4_ERROR_INODE(inode,
@@ -1292,7 +1294,7 @@ static int ext4_ext_search_right(struct inode *inode,
 	 * first one in the file */
 
 	ex = path[depth].p_ext;
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	if (*logical < le32_to_cpu(ex->ee_block)) {
 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
 			EXT4_ERROR_INODE(inode,
@@ -1506,7 +1508,8 @@ int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-	unsigned short ext1_ee_len, ext2_ee_len, max_len;
+	/* unit: cluster */
+	unsigned int ext1_ee_len, ext2_ee_len, max_len;
 
 	/*
 	 * Make sure that either both extents are uninitialized, or
@@ -1539,7 +1542,8 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 		return 0;
 #endif
 
-	if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
+	if (ext4_ext_pblock(ex1) + EXT4_INODE_C2B(inode, ext1_ee_len)
+			== ext4_ext_pblock(ex2))
 		return 1;
 	return 0;
 }
@@ -1633,7 +1637,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 	unsigned int ret = 0;
 
 	b1 = le32_to_cpu(newext->ee_block);
-	len1 = ext4_ext_get_actual_len(newext);
+	len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
 	depth = ext_depth(inode);
 	if (!path[depth].p_ext)
 		goto out;
@@ -1654,13 +1658,13 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 	/* check for wrap through zero on extent logical start block*/
 	if (b1 + len1 < b1) {
 		len1 = EXT_MAX_BLOCKS - b1;
-		newext->ee_len = cpu_to_le16(len1);
+		newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, len1));
 		ret = 1;
 	}
 
 	/* check for overlap */
 	if (b1 + len1 > b2) {
-		newext->ee_len = cpu_to_le16(b2 - b1);
+		newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, b2 - b1));
 		ret = 1;
 	}
 out:
@@ -1702,10 +1706,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		&& ext4_can_extents_be_merged(inode, ex, newext)) {
 		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
 			  ext4_ext_is_uninitialized(newext),
-			  ext4_ext_get_actual_len(newext),
+			  ext4_ext_get_actual_blocks(newext, inode->i_sb),
 			  le32_to_cpu(ex->ee_block),
 			  ext4_ext_is_uninitialized(ex),
-			  ext4_ext_get_actual_len(ex),
+			  ext4_ext_get_actual_blocks(ex, inode->i_sb),
 			  ext4_ext_pblock(ex));
 		err = ext4_ext_get_access(handle, inode, path + depth);
 		if (err)
@@ -1780,7 +1784,8 @@ has_space:
 				le32_to_cpu(newext->ee_block),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
-				ext4_ext_get_actual_len(newext));
+				ext4_ext_get_actual_blocks(newext,
+					inode->i_sb));
 		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
 	} else if (le32_to_cpu(newext->ee_block)
 			   > le32_to_cpu(nearex->ee_block)) {
@@ -1794,7 +1799,8 @@ has_space:
 					le32_to_cpu(newext->ee_block),
 					ext4_ext_pblock(newext),
 					ext4_ext_is_uninitialized(newext),
-					ext4_ext_get_actual_len(newext),
+					ext4_ext_get_actual_blocks(newext,
+						inode->i_sb),
 					nearex, len, nearex + 1, nearex + 2);
 			memmove(nearex + 2, nearex + 1, len);
 		}
@@ -1808,7 +1814,8 @@ has_space:
 				le32_to_cpu(newext->ee_block),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
-				ext4_ext_get_actual_len(newext),
+				ext4_ext_get_actual_blocks(newext,
+					inode->i_sb),
 				nearex, len, nearex, nearex + 1);
 		memmove(nearex + 1, nearex, len);
 		path[depth].p_ext = nearex;
@@ -1891,7 +1898,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			if (block + num < end)
 				end = block + num;
 		} else if (block >= le32_to_cpu(ex->ee_block)
-					+ ext4_ext_get_actual_len(ex)) {
+			+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
 			/* need to allocate space after found extent */
 			start = block;
 			end = block + num;
@@ -1904,7 +1911,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			 */
 			start = block;
 			end = le32_to_cpu(ex->ee_block)
-				+ ext4_ext_get_actual_len(ex);
+				+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
 			if (block + num < end)
 				end = block + num;
 			exists = 1;
@@ -1915,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 
 		if (!exists) {
 			cbex.ec_block = start;
-			cbex.ec_len = end - start;
+			cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
 			cbex.ec_start = 0;
 		} else {
 			cbex.ec_block = le32_to_cpu(ex->ee_block);
@@ -1947,7 +1954,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			path = NULL;
 		}
 
-		block = cbex.ec_block + cbex.ec_len;
+		block = cbex.ec_block + EXT4_INODE_C2B(inode, cbex.ec_len);
 	}
 
 	if (path) {
@@ -1968,7 +1975,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
 	trace_ext4_ext_put_in_cache(inode, block, len, start);
 	cex = &EXT4_I(inode)->i_cached_extent;
 	cex->ec_block = block;
-	cex->ec_len = len;
+	cex->ec_len = EXT4_INODE_B2C(inode, len);
 	cex->ec_start = start;
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
@@ -1999,17 +2006,17 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 		ext_debug("cache gap(before): %u [%u:%u]",
 				block,
 				le32_to_cpu(ex->ee_block),
-				 ext4_ext_get_actual_len(ex));
+				 ext4_ext_get_actual_blocks(ex, inode->i_sb));
 	} else if (block >= le32_to_cpu(ex->ee_block)
-			+ ext4_ext_get_actual_len(ex)) {
+			+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
 		ext4_lblk_t next;
 		lblock = le32_to_cpu(ex->ee_block)
-			+ ext4_ext_get_actual_len(ex);
+			+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 		next = ext4_ext_next_allocated_block(path);
 		ext_debug("cache gap(after): [%u:%u] %u",
 				le32_to_cpu(ex->ee_block),
-				ext4_ext_get_actual_len(ex),
+				ext4_ext_get_actual_blocks(ex, inode->i_sb),
 				block);
 		BUG_ON(next == lblock);
 		len = next - lblock;
@@ -2207,7 +2214,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 			      ext4_lblk_t from, ext4_lblk_t to)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
+	unsigned int ee_len =  ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	ext4_fsblk_t pblk;
 	int flags = EXT4_FREE_BLOCKS_FORGET;
 
@@ -2319,7 +2326,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	ext4_lblk_t a, b, block;
 	unsigned num;
 	ext4_lblk_t ex_ee_block;
-	unsigned short ex_ee_len;
+	unsigned int ex_ee_len;
 	unsigned uninitialized = 0;
 	struct ext4_extent *ex;
 	struct ext4_map_blocks map;
@@ -2337,7 +2344,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	ex = EXT_LAST_EXTENT(eh);
 
 	ex_ee_block = le32_to_cpu(ex->ee_block);
-	ex_ee_len = ext4_ext_get_actual_len(ex);
+	ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 	trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
 			       ex_ee_len, *partial_cluster);
@@ -2364,7 +2371,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (end <= ex_ee_block) {
 			ex--;
 			ex_ee_block = le32_to_cpu(ex->ee_block);
-			ex_ee_len = ext4_ext_get_actual_len(ex);
+			ex_ee_len = ext4_ext_get_actual_blocks(ex,
+					inode->i_sb);
 			continue;
 		} else if (a != ex_ee_block &&
 			b != ex_ee_block + ex_ee_len - 1) {
@@ -2399,7 +2407,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 				if (err < 0)
 					goto out;
 
-				ex_ee_len = ext4_ext_get_actual_len(ex);
+				ex_ee_len = ext4_ext_get_actual_blocks(ex,
+						inode->i_sb);
 
 				b = ex_ee_block+ex_ee_len - 1 < end ?
 					ex_ee_block+ex_ee_len - 1 : end;
@@ -2485,7 +2494,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		}
 
 		ex->ee_block = cpu_to_le32(block);
-		ex->ee_len = cpu_to_le16(num);
+		ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
 		/*
 		 * Do not mark uninitialized if all the blocks in the
 		 * extent have been removed.
@@ -2523,7 +2532,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 				ext4_ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
-		ex_ee_len = ext4_ext_get_actual_len(ex);
+		ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	}
 
 	if (correct_index && eh->eh_entries)
@@ -2706,7 +2715,7 @@ again:
 			flags |= EXT4_FREE_BLOCKS_METADATA;
 
 		ext4_free_blocks(handle, inode, NULL,
-				 EXT4_C2B(EXT4_SB(sb), partial_cluster),
+				 EXT4_INODE_C2B(inode, partial_cluster),
 				 EXT4_SB(sb)->s_cluster_ratio, flags);
 		partial_cluster = 0;
 	}
@@ -2793,7 +2802,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 	unsigned int ee_len;
 	int ret;
 
-	ee_len    = ext4_ext_get_actual_len(ex);
+	ee_len    = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	ee_pblock = ext4_ext_pblock(ex);
 
 	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
@@ -2854,7 +2863,7 @@ static int ext4_split_extent_at(handle_t *handle,
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	newblock = split - ee_block + ext4_ext_pblock(ex);
 
 	BUG_ON(split < ee_block || split >= (ee_block + ee_len));
@@ -2883,7 +2892,7 @@ static int ext4_split_extent_at(handle_t *handle,
 
 	/* case a */
 	memcpy(&orig_ex, ex, sizeof(orig_ex));
-	ex->ee_len = cpu_to_le16(split - ee_block);
+	ex->ee_len = cpu_to_le16(EXT4_INODE_B2C(inode, split - ee_block));
 	if (split_flag & EXT4_EXT_MARK_UNINIT1)
 		ext4_ext_mark_uninitialized(ex);
 
@@ -2897,7 +2906,8 @@ static int ext4_split_extent_at(handle_t *handle,
 
 	ex2 = &newex;
 	ex2->ee_block = cpu_to_le32(split);
-	ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
+	ex2->ee_len   = cpu_to_le16(
+			EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
 	ext4_ext_store_pblock(ex2, newblock);
 	if (split_flag & EXT4_EXT_MARK_UNINIT2)
 		ext4_ext_mark_uninitialized(ex2);
@@ -2908,7 +2918,7 @@ static int ext4_split_extent_at(handle_t *handle,
 		if (err)
 			goto fix_extent_len;
 		/* update the extent length and mark as initialized */
-		ex->ee_len = cpu_to_le32(ee_len);
+		ex->ee_len = cpu_to_le32(EXT4_INODE_B2C(inode, ee_len));
 		ext4_ext_try_to_merge(inode, path, ex);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		goto out;
@@ -2953,7 +2963,7 @@ static int ext4_split_extent(handle_t *handle,
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	uninitialized = ext4_ext_is_uninitialized(ex);
 
 	if (map->m_lblk + map->m_len < ee_block + ee_len) {
@@ -3028,7 +3038,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	allocated = ee_len - (map->m_lblk - ee_block);
 
 	WARN_ON(map->m_lblk < ee_block);
@@ -3070,7 +3080,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			/* case 3 */
 			zero_ex.ee_block =
 					 cpu_to_le32(map->m_lblk);
-			zero_ex.ee_len = cpu_to_le16(allocated);
+			zero_ex.ee_len = cpu_to_le16(
+					EXT4_INODE_B2C(inode, allocated));
 			ext4_ext_store_pblock(&zero_ex,
 				ext4_ext_pblock(ex) + map->m_lblk - ee_block);
 			err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3084,8 +3095,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			/* case 2 */
 			if (map->m_lblk != ee_block) {
 				zero_ex.ee_block = ex->ee_block;
-				zero_ex.ee_len = cpu_to_le16(map->m_lblk -
-							ee_block);
+				zero_ex.ee_len =
+					cpu_to_le16(EXT4_INODE_B2C(inode,
+					map->m_lblk - ee_block));
 				ext4_ext_store_pblock(&zero_ex,
 						      ext4_ext_pblock(ex));
 				err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3157,7 +3169,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
-	ee_len = ext4_ext_get_actual_len(ex);
+	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 	split_flag |= EXT4_EXT_MARK_UNINIT2;
@@ -3180,7 +3192,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
 		"block %llu, max_blocks %u\n", inode->i_ino,
 		(unsigned long long)le32_to_cpu(ex->ee_block),
-		ext4_ext_get_actual_len(ex));
+		ext4_ext_get_actual_blocks(ex, inode->i_sb));
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
@@ -3242,7 +3254,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 	 * function immediately.
 	 */
 	if (lblk + len < le32_to_cpu(last_ex->ee_block) +
-	    ext4_ext_get_actual_len(last_ex))
+	    ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
 		return 0;
 	/*
 	 * If the caller does appear to be planning to write at or
@@ -3645,7 +3657,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
 	ext4_lblk_t rr_cluster_start, rr_cluster_end;
 	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
 	ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
-	unsigned short ee_len = ext4_ext_get_actual_len(ex);
+	unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);
 
 	/* The extent passed in that we are trying to match */
 	ex_cluster_start = EXT4_B2C(sbi, ee_block);
@@ -3761,7 +3773,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 				   - le32_to_cpu(newex.ee_block)
 				   + ext4_ext_pblock(&newex);
 			/* number of remaining blocks in the extent */
-			allocated = ext4_ext_get_actual_len(&newex) -
+			allocated = ext4_ext_get_actual_blocks(&newex,
+				inode->i_sb) -
 				(map->m_lblk - le32_to_cpu(newex.ee_block));
 			goto out;
 		}
@@ -3796,13 +3809,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
 		ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
 		ext4_fsblk_t partial_cluster = 0;
-		unsigned short ee_len;
+		unsigned int ee_len;
 
 		/*
 		 * Uninitialized extents are treated as holes, except that
 		 * we split out initialized portions during a write.
 		 */
-		ee_len = ext4_ext_get_actual_len(ex);
+		ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 		trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
 
@@ -3880,7 +3893,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 				depth = ext_depth(inode);
 				ex = path[depth].p_ext;
-				ee_len = ext4_ext_get_actual_len(ex);
+				ee_len = ext4_ext_get_actual_blocks(ex,
+						inode->i_sb);
 				ee_block = le32_to_cpu(ex->ee_block);
 				ee_start = ext4_ext_pblock(ex);
 
@@ -4064,13 +4078,14 @@ got_allocated_blocks:
 		 * but otherwise we'd need to call it every free() */
 		ext4_discard_preallocations(inode);
 		ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
-				 ext4_ext_get_actual_len(&newex), fb_flags);
+				ext4_ext_get_actual_blocks(&newex, inode->i_sb),
+				fb_flags);
 		goto out2;
 	}
 
 	/* previous routine could use block we allocated */
 	newblock = ext4_ext_pblock(&newex);
-	allocated = ext4_ext_get_actual_len(&newex);
+	allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
 	if (allocated > map->m_len)
 		allocated = map->m_len;
 	map->m_flags |= EXT4_MAP_NEW;
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/9 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
  2011-11-09 11:17 ` [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 3/9 bigalloc] ext4: change unit of ee_block of extent to cluster Robin Dong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

We need to align to a cluster when users allocate just one block.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/extents.c |   45 ++++++++++++---------------------------------
 1 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 597ebcb..3430ddf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3949,20 +3949,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	 * Okay, we need to do block allocation.
 	 */
 	map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
-	newex.ee_block = cpu_to_le32(map->m_lblk);
+	newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
 	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
 
-	/*
-	 * If we are doing bigalloc, check to see if the extent returned
-	 * by ext4_ext_find_extent() implies a cluster we can use.
-	 */
-	if (cluster_offset && ex &&
-	    get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
-		ar.len = allocated = map->m_len;
-		newblock = map->m_pblk;
-		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-		goto got_allocated_blocks;
-	}
+	if (ex)
+		BUG_ON((le32_to_cpu(ex->ee_block) +
+			EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
+			(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
 
 	/* find neighbour allocated blocks */
 	ar.lleft = map->m_lblk;
@@ -3975,16 +3968,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	if (err)
 		goto out2;
 
-	/* Check if the extent after searching to the right implies a
-	 * cluster we can use. */
-	if ((sbi->s_cluster_ratio > 1) && ex2 &&
-	    get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
-		ar.len = allocated = map->m_len;
-		newblock = map->m_pblk;
-		map->m_flags |= EXT4_MAP_FROM_CLUSTER;
-		goto got_allocated_blocks;
-	}
-
 	/*
 	 * See if request is beyond maximum number of blocks we can have in
 	 * a single extent. For an initialized extent this limit is
@@ -3999,7 +3982,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		map->m_len = EXT_UNINIT_MAX_LEN;
 
 	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
-	newex.ee_len = cpu_to_le16(map->m_len);
+	newex.ee_len = cpu_to_le16(EXT4_NUM_B2C(sbi, map->m_len));
 	err = ext4_ext_check_overlap(sbi, inode, &newex, path);
 	if (err)
 		allocated = ext4_ext_get_actual_len(&newex);
@@ -4036,14 +4019,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		  ar.goal, newblock, allocated);
 	free_on_err = 1;
 	allocated_clusters = ar.len;
-	ar.len = EXT4_C2B(sbi, ar.len) - offset;
-	if (ar.len > allocated)
-		ar.len = allocated;
 
 got_allocated_blocks:
 	/* try to insert new extent into found leaf and return */
-	ext4_ext_store_pblock(&newex, newblock + offset);
-	newex.ee_len = cpu_to_le16(ar.len);
+	ext4_ext_store_pblock(&newex, newblock);
+	newex.ee_len = cpu_to_le16(allocated_clusters);
 	/* Mark uninitialized */
 	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
 		ext4_ext_mark_uninitialized(&newex);
@@ -4066,7 +4046,8 @@ got_allocated_blocks:
 			map->m_flags |= EXT4_MAP_UNINIT;
 	}
 
-	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
+	err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+			EXT4_C2B(sbi, allocated_clusters));
 	if (!err)
 		err = ext4_ext_insert_extent(handle, inode, path,
 					     &newex, flags);
@@ -4086,8 +4067,6 @@ got_allocated_blocks:
 	/* previous routine could use block we allocated */
 	newblock = ext4_ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
-	if (allocated > map->m_len)
-		allocated = map->m_len;
 	map->m_flags |= EXT4_MAP_NEW;
 
 	/*
@@ -4174,7 +4153,7 @@ got_allocated_blocks:
 	 * when it is _not_ an uninitialized extent.
 	 */
 	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
-		ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
+		ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
 		ext4_update_inode_fsync_trans(handle, inode, 1);
 	} else
 		ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -4183,7 +4162,7 @@ out:
 		allocated = map->m_len;
 	ext4_ext_show_leaf(inode, path);
 	map->m_flags |= EXT4_MAP_MAPPED;
-	map->m_pblk = newblock;
+	map->m_pblk = newblock + offset;
 	map->m_len = allocated;
 out2:
 	if (path) {
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/9 bigalloc] ext4: change unit of ee_block of extent to cluster
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
  2011-11-09 11:17 ` [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks Robin Dong
  2011-11-09 11:17 ` [PATCH 2/9 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 4/9] ext4: remove unused functions and tags Robin Dong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Change the unit of ee_block (of extent) from block to cluster

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/extents.c |  286 ++++++++++++++++++++++++++++++++---------------------
 1 files changed, 174 insertions(+), 112 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3430ddf..4f764ee 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -140,7 +140,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		ex = path[depth].p_ext;
 		if (ex) {
 			ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
-			ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+			ext4_lblk_t ext_block = EXT4_INODE_C2B(inode,
+					le32_to_cpu(ex->ee_block));
 
 			if (block > ext_block)
 				return ext_pblk + (block - ext_block);
@@ -168,7 +169,8 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
 {
 	ext4_fsblk_t goal, newblock;
 
-	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	goal = ext4_ext_find_goal(inode, path,
+			EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)));
 	newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
 					NULL, err);
 	return newblock;
@@ -411,11 +413,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("path:");
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
-		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
-			    ext4_idx_pblock(path->p_idx));
+			ext_debug("  %d->%llu", EXT4_INODE_C2B(inode,
+				le32_to_cpu(path->p_idx->ei_block)),
+				ext4_idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
 			ext_debug("  %d:[%d]%d:%llu ",
-				  le32_to_cpu(path->p_ext->ee_block),
+				  EXT4_INODE_C2B(inode,
+					  le32_to_cpu(path->p_ext->ee_block)),
 				  ext4_ext_is_uninitialized(path->p_ext),
 				  ext4_ext_get_actual_blocks(path->p_ext,
 					  inode->i_sb),
@@ -442,7 +446,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
 
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-		ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
+		ext_debug("%d:[%d]%d:%llu ",
+			  EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
 			  ext4_ext_is_uninitialized(ex),
 			  ext4_ext_get_actual_blocks(ex, inode->i_sb),
 			  ext4_ext_pblock(ex));
@@ -461,7 +466,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 		idx = path[level].p_idx;
 		while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
 			ext_debug("%d: move %d:%llu in new index %llu\n", level,
-					le32_to_cpu(idx->ei_block),
+					EXT4_INODE_C2B(inode,
+						le32_to_cpu(idx->ei_block)),
 					ext4_idx_pblock(idx),
 					newblock);
 			idx++;
@@ -473,7 +479,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 	ex = path[depth].p_ext;
 	while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
-				le32_to_cpu(ex->ee_block),
+				EXT4_INODE_C2B(ionde,
+					le32_to_cpu(ex->ee_block)),
 				ext4_ext_pblock(ex),
 				ext4_ext_is_uninitialized(ex),
 				ext4_ext_get_actual_blocks(ex, inode->i_sb),
@@ -519,17 +526,19 @@ ext4_ext_binsearch_idx(struct inode *inode,
 	r = EXT_LAST_INDEX(eh);
 	while (l <= r) {
 		m = l + (r - l) / 2;
-		if (block < le32_to_cpu(m->ei_block))
+		if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)))
 			r = m - 1;
 		else
 			l = m + 1;
-		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
-				m, le32_to_cpu(m->ei_block),
-				r, le32_to_cpu(r->ei_block));
+		ext_debug("%p(%u):%p(%u):%p(%u) ",
+			l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ei_block)),
+			m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)),
+			r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ei_block)));
 	}
 
 	path->p_idx = l - 1;
-	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+	ext_debug("  -> %d->%lld ",
+		  EXT4_INODE_C2B(inode, le32_to_cpu(path->p_idx->ei_block)),
 		  ext4_idx_pblock(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
@@ -545,12 +554,14 @@ ext4_ext_binsearch_idx(struct inode *inode,
 				       "first=0x%p\n", k,
 				       ix, EXT_FIRST_INDEX(eh));
 				printk(KERN_DEBUG "%u <= %u\n",
-				       le32_to_cpu(ix->ei_block),
-				       le32_to_cpu(ix[-1].ei_block));
+				       EXT4_INODE_C2B(inode,
+					       le32_to_cpu(ix->ei_block)),
+				       EXT4_INODE_C2B(inode,
+					       le32_to_cpu(ix[-1].ei_block)));
 			}
 			BUG_ON(k && le32_to_cpu(ix->ei_block)
 					   <= le32_to_cpu(ix[-1].ei_block));
-			if (block < le32_to_cpu(ix->ei_block))
+			if (block < EXT4_INODE_C2B(le32_to_cpu(ix->ei_block)))
 				break;
 			chix = ix;
 		}
@@ -587,21 +598,22 @@ ext4_ext_binsearch(struct inode *inode,
 
 	while (l <= r) {
 		m = l + (r - l) / 2;
-		if (block < le32_to_cpu(m->ee_block))
+		if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)))
 			r = m - 1;
 		else
 			l = m + 1;
-		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
-				m, le32_to_cpu(m->ee_block),
-				r, le32_to_cpu(r->ee_block));
+		ext_debug("%p(%u):%p(%u):%p(%u) ",
+			l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ee_block)),
+			m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)),
+			r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ee_block)));
 	}
 
 	path->p_ext = l - 1;
 	ext_debug("  -> %d:%llu:[%d]%d ",
-			le32_to_cpu(path->p_ext->ee_block),
-			ext4_ext_pblock(path->p_ext),
-			ext4_ext_is_uninitialized(path->p_ext),
-			ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));
+		EXT4_INODE_C2B(inode, le32_to_cpu(path->p_ext->ee_block)),
+		ext4_ext_pblock(path->p_ext),
+		ext4_ext_is_uninitialized(path->p_ext),
+		ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -612,7 +624,8 @@ ext4_ext_binsearch(struct inode *inode,
 		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
 			BUG_ON(k && le32_to_cpu(ex->ee_block)
 					  <= le32_to_cpu(ex[-1].ee_block));
-			if (block < le32_to_cpu(ex->ee_block))
+			if (block < EXT4_INODE_C2B(inode,
+						le32_to_cpu(ex->ee_block)))
 				break;
 			chex = ex;
 		}
@@ -737,10 +750,13 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	if (err)
 		return err;
 
+	/* variable "logical" is in unit of cluster */
 	if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
 		EXT4_ERROR_INODE(inode,
 				 "logical %d == ei_block %d!",
-				 logical, le32_to_cpu(curp->p_idx->ei_block));
+				 logical,
+				 EXT4_INODE_C2B(inode,
+					 le32_to_cpu(curp->p_idx->ei_block)));
 		return -EIO;
 	}
 
@@ -971,8 +987,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
 					EXT_LAST_INDEX(path[i].p_hdr))) {
 			EXT4_ERROR_INODE(inode,
-					 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
-					 le32_to_cpu(path[i].p_ext->ee_block));
+				"EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+				EXT4_INODE_C2B(inode,
+					le32_to_cpu(path[i].p_ext->ee_block)));
 			err = -EIO;
 			goto cleanup;
 		}
@@ -1112,7 +1129,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	neh = ext_inode_hdr(inode);
 	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
-		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+		  EXT4_INODE_C2B(inode,
+			  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block)),
 		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
 	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
@@ -1158,7 +1176,8 @@ repeat:
 		/* refill path */
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode,
-				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+				    (ext4_lblk_t)EXT4_INODE_C2B(inode,
+					    le32_to_cpu(newext->ee_block)),
 				    path);
 		if (IS_ERR(path))
 			err = PTR_ERR(path);
@@ -1172,7 +1191,8 @@ repeat:
 		/* refill path */
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode,
-				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+				   (ext4_lblk_t)EXT4_INODE_C2B(inode,
+					   le32_to_cpu(newext->ee_block)),
 				    path);
 		if (IS_ERR(path)) {
 			err = PTR_ERR(path);
@@ -1225,11 +1245,13 @@ static int ext4_ext_search_left(struct inode *inode,
 
 	ex = path[depth].p_ext;
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
-	if (*logical < le32_to_cpu(ex->ee_block)) {
+	if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
 			EXT4_ERROR_INODE(inode,
 					 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
-					 *logical, le32_to_cpu(ex->ee_block));
+					 *logical,
+					 EXT4_INODE_C2B(inode,
+						 le32_to_cpu(ex->ee_block)));
 			return -EIO;
 		}
 		while (--depth >= 0) {
@@ -1237,9 +1259,11 @@ static int ext4_ext_search_left(struct inode *inode,
 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
 				EXT4_ERROR_INODE(inode,
 				  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
-				  ix != NULL ? ix->ei_block : 0,
+				  ix != NULL ? EXT4_INODE_C2B(
+					  inode, ix->ei_block) : 0,
 				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
-				    EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+				    EXT4_INODE_C2B(inode, EXT_FIRST_INDEX(
+					path[depth].p_hdr)->ei_block) : 0,
 				  depth);
 				return -EIO;
 			}
@@ -1247,14 +1271,19 @@ static int ext4_ext_search_left(struct inode *inode,
 		return 0;
 	}
 
-	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+	if (unlikely(*logical <
+		(EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
 		EXT4_ERROR_INODE(inode,
 				 "logical %d < ee_block %d + ee_len %d!",
-				 *logical, le32_to_cpu(ex->ee_block), ee_len);
+				 *logical,
+				 EXT4_INODE_C2B(inode,
+					 le32_to_cpu(ex->ee_block)),
+				 ee_len);
 		return -EIO;
 	}
 
-	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
+	*logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+		+ ee_len - 1;
 	*phys = ext4_ext_pblock(ex) + ee_len - 1;
 	return 0;
 }
@@ -1295,7 +1324,7 @@ static int ext4_ext_search_right(struct inode *inode,
 
 	ex = path[depth].p_ext;
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
-	if (*logical < le32_to_cpu(ex->ee_block)) {
+	if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
 			EXT4_ERROR_INODE(inode,
 					 "first_extent(path[%d].p_hdr) != ex",
@@ -1314,10 +1343,14 @@ static int ext4_ext_search_right(struct inode *inode,
 		goto found_extent;
 	}
 
-	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+	if (unlikely(*logical <
+		(EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
 		EXT4_ERROR_INODE(inode,
 				 "logical %d < ee_block %d + ee_len %d!",
-				 *logical, le32_to_cpu(ex->ee_block), ee_len);
+				 *logical,
+				 EXT4_INODE_C2B(inode,
+					 le32_to_cpu(ex->ee_block)),
+				 ee_len);
 		return -EIO;
 	}
 
@@ -1368,7 +1401,7 @@ got_index:
 	}
 	ex = EXT_FIRST_EXTENT(eh);
 found_extent:
-	*logical = le32_to_cpu(ex->ee_block);
+	*logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	*phys = ext4_ext_pblock(ex);
 	*ret_ex = ex;
 	if (bh)
@@ -1384,7 +1417,7 @@ found_extent:
  * with leaves.
  */
 static ext4_lblk_t
-ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+ext4_ext_next_allocated_block(struct inode *inode, struct ext4_ext_path *path)
 {
 	int depth;
 
@@ -1397,14 +1430,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 	while (depth >= 0) {
 		if (depth == path->p_depth) {
 			/* leaf */
-			if (path[depth].p_ext !=
-					EXT_LAST_EXTENT(path[depth].p_hdr))
-			  return le32_to_cpu(path[depth].p_ext[1].ee_block);
+			if (path[depth].p_ext != EXT_LAST_EXTENT(
+						path[depth].p_hdr))
+				return EXT4_INODE_C2B(inode, le32_to_cpu(
+						path[depth].p_ext[1].ee_block));
 		} else {
 			/* index */
-			if (path[depth].p_idx !=
-					EXT_LAST_INDEX(path[depth].p_hdr))
-			  return le32_to_cpu(path[depth].p_idx[1].ei_block);
+			if (path[depth].p_idx != EXT_LAST_INDEX(
+						path[depth].p_hdr))
+				return EXT4_INODE_C2B(inode, le32_to_cpu(
+						path[depth].p_idx[1].ei_block));
 		}
 		depth--;
 	}
@@ -1416,7 +1451,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
  * ext4_ext_next_leaf_block:
  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
  */
-static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
+static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
+		struct ext4_ext_path *path)
 {
 	int depth;
 
@@ -1433,8 +1469,8 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
 	while (depth >= 0) {
 		if (path[depth].p_idx !=
 				EXT_LAST_INDEX(path[depth].p_hdr))
-			return (ext4_lblk_t)
-				le32_to_cpu(path[depth].p_idx[1].ei_block);
+			return (ext4_lblk_t) EXT4_INODE_C2B(inode,
+				le32_to_cpu(path[depth].p_idx[1].ei_block));
 		depth--;
 	}
 
@@ -1636,12 +1672,12 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 	unsigned int depth, len1;
 	unsigned int ret = 0;
 
-	b1 = le32_to_cpu(newext->ee_block);
+	b1 = EXT4_INODE_C2B(inode, le32_to_cpu(newext->ee_block));
 	len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
 	depth = ext_depth(inode);
 	if (!path[depth].p_ext)
 		goto out;
-	b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+	b2 = EXT4_INODE_C2B(inode, le32_to_cpu(path[depth].p_ext->ee_block));
 	b2 &= ~(sbi->s_cluster_ratio - 1);
 
 	/*
@@ -1649,7 +1685,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
 	 * is before the requested block(s)
 	 */
 	if (b2 < b1) {
-		b2 = ext4_ext_next_allocated_block(path);
+		b2 = ext4_ext_next_allocated_block(inode, path);
 		if (b2 == EXT_MAX_BLOCKS)
 			goto out;
 		b2 &= ~(sbi->s_cluster_ratio - 1);
@@ -1707,7 +1743,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
 			  ext4_ext_is_uninitialized(newext),
 			  ext4_ext_get_actual_blocks(newext, inode->i_sb),
-			  le32_to_cpu(ex->ee_block),
+			  EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
 			  ext4_ext_is_uninitialized(ex),
 			  ext4_ext_get_actual_blocks(ex, inode->i_sb),
 			  ext4_ext_pblock(ex));
@@ -1740,7 +1776,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	fex = EXT_LAST_EXTENT(eh);
 	next = EXT_MAX_BLOCKS;
 	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
-		next = ext4_ext_next_leaf_block(path);
+		next = ext4_ext_next_leaf_block(inode, path);
 	if (next != EXT_MAX_BLOCKS) {
 		ext_debug("next leaf block - %d\n", next);
 		BUG_ON(npath != NULL);
@@ -1781,7 +1817,8 @@ has_space:
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
 		ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
-				le32_to_cpu(newext->ee_block),
+				EXT4_INODE_C2B(inode,
+					le32_to_cpu(newext->ee_block)),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_blocks(newext,
@@ -1796,7 +1833,8 @@ has_space:
 			len = len < 0 ? 0 : len;
 			ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
-					le32_to_cpu(newext->ee_block),
+					EXT4_INODE_C2B(inode,
+						le32_to_cpu(newext->ee_block)),
 					ext4_ext_pblock(newext),
 					ext4_ext_is_uninitialized(newext),
 					ext4_ext_get_actual_blocks(newext,
@@ -1811,7 +1849,8 @@ has_space:
 		len = len < 0 ? 0 : len;
 		ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
-				le32_to_cpu(newext->ee_block),
+				EXT4_INODE_C2B(inode,
+					le32_to_cpu(newext->ee_block)),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_blocks(newext,
@@ -1883,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			break;
 		}
 		ex = path[depth].p_ext;
-		next = ext4_ext_next_allocated_block(path);
+		next = ext4_ext_next_allocated_block(inode, path);
 
 		exists = 0;
 		if (!ex) {
@@ -1891,26 +1930,29 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			 * all requested space */
 			start = block;
 			end = block + num;
-		} else if (le32_to_cpu(ex->ee_block) > block) {
+		} else if (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+				> block) {
 			/* need to allocate space before found extent */
 			start = block;
-			end = le32_to_cpu(ex->ee_block);
+			end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 			if (block + num < end)
 				end = block + num;
-		} else if (block >= le32_to_cpu(ex->ee_block)
+		} else if (block >=
+				EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
 			+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
 			/* need to allocate space after found extent */
 			start = block;
 			end = block + num;
 			if (end >= next)
 				end = next;
-		} else if (block >= le32_to_cpu(ex->ee_block)) {
+		} else if (block >= EXT4_INODE_C2B(inode,
+					le32_to_cpu(ex->ee_block))) {
 			/*
 			 * some part of requested space is covered
 			 * by found extent
 			 */
 			start = block;
-			end = le32_to_cpu(ex->ee_block)
+			end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
 				+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
 			if (block + num < end)
 				end = block + num;
@@ -1925,7 +1967,8 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 			cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
 			cbex.ec_start = 0;
 		} else {
-			cbex.ec_block = le32_to_cpu(ex->ee_block);
+			cbex.ec_block = EXT4_INODE_C2B(inode,
+					le32_to_cpu(ex->ee_block));
 			cbex.ec_len = ext4_ext_get_actual_len(ex);
 			cbex.ec_start = ext4_ext_pblock(ex);
 		}
@@ -2000,24 +2043,24 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 		lblock = 0;
 		len = EXT_MAX_BLOCKS;
 		ext_debug("cache gap(whole file):");
-	} else if (block < le32_to_cpu(ex->ee_block)) {
+	} else if (block < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
 		lblock = block;
-		len = le32_to_cpu(ex->ee_block) - block;
+		len = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) - block;
 		ext_debug("cache gap(before): %u [%u:%u]",
-				block,
-				le32_to_cpu(ex->ee_block),
-				 ext4_ext_get_actual_blocks(ex, inode->i_sb));
-	} else if (block >= le32_to_cpu(ex->ee_block)
+			block,
+			EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+			ext4_ext_get_actual_blocks(ex, inode->i_sb));
+	} else if (block >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
 			+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
 		ext4_lblk_t next;
-		lblock = le32_to_cpu(ex->ee_block)
+		lblock = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
 			+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
-		next = ext4_ext_next_allocated_block(path);
+		next = ext4_ext_next_allocated_block(inode, path);
 		ext_debug("cache gap(after): [%u:%u] %u",
-				le32_to_cpu(ex->ee_block),
-				ext4_ext_get_actual_blocks(ex, inode->i_sb),
-				block);
+			EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+			ext4_ext_get_actual_blocks(ex, inode->i_sb),
+			block);
 		BUG_ON(next == lblock);
 		len = next - lblock;
 	} else {
@@ -2026,7 +2069,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 	}
 
 	ext_debug(" -> %u:%lu\n", lblock, len);
-	ext4_ext_put_in_cache(inode, lblock, len, 0);
+	ext4_ext_put_in_cache(inode, EXT4_INODE_B2C(inode, lblock), len, 0);
 }
 
 /*
@@ -2062,11 +2105,14 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
 	if (cex->ec_len == 0)
 		goto errout;
 
-	if (in_range(block, cex->ec_block, cex->ec_len)) {
+	if (in_range(block, EXT4_C2B(sbi, cex->ec_block),
+				EXT4_C2B(sbi, cex->ec_len))) {
 		memcpy(ex, cex, sizeof(struct ext4_ext_cache));
 		ext_debug("%u cached by %u:%u:%llu\n",
 				block,
-				cex->ec_block, cex->ec_len, cex->ec_start);
+				EXT4_C2B(sbi, cex->ec_block),
+				EXT4_C2B(sbi, cex->ec_len),
+				EXT4_C2B(sbi, cex->ec_start));
 		ret = 1;
 	}
 errout:
@@ -2229,9 +2275,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	 */
 	flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
 
-	trace_ext4_remove_blocks(inode, cpu_to_le32(ex->ee_block),
-				 ext4_ext_pblock(ex), ee_len, from,
-				 to, *partial_cluster);
+	trace_ext4_remove_blocks(inode,
+			cpu_to_le32(EXT4_INODE_C2B(inode, ex->ee_block)),
+			ext4_ext_pblock(ex), ee_len, from,
+			to, *partial_cluster);
 	/*
 	 * If we have a partial cluster, and it's different from the
 	 * cluster of the last block, we need to explicitly free the
@@ -2260,12 +2307,14 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		spin_unlock(&sbi->s_ext_stats_lock);
 	}
 #endif
-	if (from >= le32_to_cpu(ex->ee_block)
-	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
+	if (from >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+	    && to == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+	    + ee_len - 1) {
 		/* tail removal */
 		ext4_lblk_t num;
 
-		num = le32_to_cpu(ex->ee_block) + ee_len - from;
+		num = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+				+ ee_len - from;
 		pblk = ext4_ext_pblock(ex) + ee_len - num;
 		ext_debug("free last %u blocks starting %llu\n", num, pblk);
 		ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
@@ -2282,8 +2331,9 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 			*partial_cluster = EXT4_B2C(sbi, pblk);
 		else
 			*partial_cluster = 0;
-	} else if (from == le32_to_cpu(ex->ee_block)
-		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
+	} else if (from == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+		   && to <= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+		   + ee_len - 1) {
 		/* head removal */
 		ext4_lblk_t num;
 		ext4_fsblk_t start;
@@ -2297,7 +2347,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	} else {
 		printk(KERN_INFO "strange request: removal(2) "
 				"%u-%u from %u:%u\n",
-				from, to, le32_to_cpu(ex->ee_block), ee_len);
+				from, to,
+				EXT4_INODE_C2B(inode,
+					le32_to_cpu(ex->ee_block)),
+				ee_len);
 	}
 	return 0;
 }
@@ -2343,7 +2396,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	/* find where to start removing */
 	ex = EXT_LAST_EXTENT(eh);
 
-	ex_ee_block = le32_to_cpu(ex->ee_block);
+	ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 	trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
@@ -2370,7 +2423,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		/* If this extent is beyond the end of the hole, skip it */
 		if (end <= ex_ee_block) {
 			ex--;
-			ex_ee_block = le32_to_cpu(ex->ee_block);
+			ex_ee_block = EXT4_INODE_C2B(inode,
+					le32_to_cpu(ex->ee_block));
 			ex_ee_len = ext4_ext_get_actual_blocks(ex,
 					inode->i_sb);
 			continue;
@@ -2493,7 +2547,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
 		}
 
-		ex->ee_block = cpu_to_le32(block);
+		BUG_ON(block & (sbi->s_cluster_ratio-1));
+		ex->ee_block = cpu_to_le32(EXT4_B2C(sbi, block));
 		ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
 		/*
 		 * Do not mark uninitialized if all the blocks in the
@@ -2531,7 +2586,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		ext_debug("new extent: %u:%u:%llu\n", block, num,
 				ext4_ext_pblock(ex));
 		ex--;
-		ex_ee_block = le32_to_cpu(ex->ee_block);
+		ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 		ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	}
 
@@ -2862,7 +2917,7 @@ static int ext4_split_extent_at(handle_t *handle,
 
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
-	ee_block = le32_to_cpu(ex->ee_block);
+	ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	newblock = split - ee_block + ext4_ext_pblock(ex);
 
@@ -2905,7 +2960,7 @@ static int ext4_split_extent_at(handle_t *handle,
 		goto fix_extent_len;
 
 	ex2 = &newex;
-	ex2->ee_block = cpu_to_le32(split);
+	ex2->ee_block = cpu_to_le32(EXT4_INODE_B2C(inode, split));
 	ex2->ee_len   = cpu_to_le16(
 			EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
 	ext4_ext_store_pblock(ex2, newblock);
@@ -2962,7 +3017,7 @@ static int ext4_split_extent(handle_t *handle,
 
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
-	ee_block = le32_to_cpu(ex->ee_block);
+	ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	uninitialized = ext4_ext_is_uninitialized(ex);
 
@@ -3037,7 +3092,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
-	ee_block = le32_to_cpu(ex->ee_block);
+	ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 	allocated = ee_len - (map->m_lblk - ee_block);
 
@@ -3078,8 +3133,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		if (allocated <= EXT4_EXT_ZERO_LEN &&
 		    (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
 			/* case 3 */
-			zero_ex.ee_block =
-					 cpu_to_le32(map->m_lblk);
+			zero_ex.ee_block = cpu_to_le32(EXT4_INODE_B2C(inode,
+						map->m_lblk));
 			zero_ex.ee_len = cpu_to_le16(
 					EXT4_INODE_B2C(inode, allocated));
 			ext4_ext_store_pblock(&zero_ex,
@@ -3168,7 +3223,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	 */
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
-	ee_block = le32_to_cpu(ex->ee_block);
+	ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
 	ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
 
 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
@@ -3191,7 +3246,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 
 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
 		"block %llu, max_blocks %u\n", inode->i_ino,
-		(unsigned long long)le32_to_cpu(ex->ee_block),
+		(unsigned long long)EXT4_INODE_C2B(inode,
+			le32_to_cpu(ex->ee_block)),
 		ext4_ext_get_actual_blocks(ex, inode->i_sb));
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
@@ -3253,7 +3309,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 	 * this turns out to be false, we can bail out from this
 	 * function immediately.
 	 */
-	if (lblk + len < le32_to_cpu(last_ex->ee_block) +
+	if (lblk + len < EXT4_INODE_C2B(inode, le32_to_cpu(last_ex->ee_block)) +
 	    ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
 		return 0;
 	/*
@@ -3697,7 +3753,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
 		 *                  |================|
 		 */
 		if (map->m_lblk > ee_block) {
-			ext4_lblk_t next = ext4_ext_next_allocated_block(path);
+			ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
 			map->m_len = min(map->m_len, next - map->m_lblk);
 		}
 
@@ -3770,12 +3826,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			if (sbi->s_cluster_ratio > 1)
 				map->m_flags |= EXT4_MAP_FROM_CLUSTER;
 			newblock = map->m_lblk
-				   - le32_to_cpu(newex.ee_block)
+				   - EXT4_C2B(sbi, le32_to_cpu(newex.ee_block))
 				   + ext4_ext_pblock(&newex);
 			/* number of remaining blocks in the extent */
 			allocated = ext4_ext_get_actual_blocks(&newex,
 				inode->i_sb) -
-				(map->m_lblk - le32_to_cpu(newex.ee_block));
+				(map->m_lblk - EXT4_C2B(sbi,
+						le32_to_cpu(newex.ee_block)));
 			goto out;
 		}
 	}
@@ -3806,7 +3863,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 	ex = path[depth].p_ext;
 	if (ex) {
-		ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+		ext4_lblk_t ee_block = EXT4_C2B(sbi, le32_to_cpu(ex->ee_block));
 		ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
 		ext4_fsblk_t partial_cluster = 0;
 		unsigned int ee_len;
@@ -3833,7 +3890,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 				 * in the cache
 				 */
 				if (!ext4_ext_is_uninitialized(ex)) {
-					ext4_ext_put_in_cache(inode, ee_block,
+					ext4_ext_put_in_cache(inode,
+						EXT4_B2C(sbi, ee_block),
 						ee_len, ee_start);
 					goto out;
 				}
@@ -3895,7 +3953,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 				ex = path[depth].p_ext;
 				ee_len = ext4_ext_get_actual_blocks(ex,
 						inode->i_sb);
-				ee_block = le32_to_cpu(ex->ee_block);
+				ee_block = EXT4_C2B(sbi,
+						le32_to_cpu(ex->ee_block));
 				ee_start = ext4_ext_pblock(ex);
 
 			}
@@ -3949,11 +4008,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	 * Okay, we need to do block allocation.
 	 */
 	map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
-	newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
+	newex.ee_block = EXT4_B2C(sbi,
+			cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
 	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
 
 	if (ex)
-		BUG_ON((le32_to_cpu(ex->ee_block) +
+		BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
 			EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
 			(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
 
@@ -4012,6 +4072,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags = 0;
 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+	printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
 	if (!newblock)
 		goto out2;
@@ -4153,7 +4214,8 @@ got_allocated_blocks:
 	 * when it is _not_ an uninitialized extent.
 	 */
 	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
-		ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
+		ext4_ext_put_in_cache(inode, EXT4_B2C(sbi, ar.logical),
+				allocated, newblock);
 		ext4_update_inode_fsync_trans(handle, inode, 1);
 	} else
 		ext4_update_inode_fsync_trans(handle, inode, 0);
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/9] ext4: remove unused functions and tags
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (2 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 3/9 bigalloc] ext4: change unit of ee_block of extent to cluster Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 5/9 bigalloc] ext4: zero out extra pages when users write one page Robin Dong
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/extents.c |  109 -----------------------------------------------------
 1 files changed, 0 insertions(+), 109 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4f764ee..ccf12a0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3662,111 +3662,6 @@ out2:
 }
 
 /*
- * get_implied_cluster_alloc - check to see if the requested
- * allocation (in the map structure) overlaps with a cluster already
- * allocated in an extent.
- *	@sb	The filesystem superblock structure
- *	@map	The requested lblk->pblk mapping
- *	@ex	The extent structure which might contain an implied
- *			cluster allocation
- *
- * This function is called by ext4_ext_map_blocks() after we failed to
- * find blocks that were already in the inode's extent tree.  Hence,
- * we know that the beginning of the requested region cannot overlap
- * the extent from the inode's extent tree.  There are three cases we
- * want to catch.  The first is this case:
- *
- *		 |--- cluster # N--|
- *    |--- extent ---|	|---- requested region ---|
- *			|==========|
- *
- * The second case that we need to test for is this one:
- *
- *   |--------- cluster # N ----------------|
- *	   |--- requested region --|   |------- extent ----|
- *	   |=======================|
- *
- * The third case is when the requested region lies between two extents
- * within the same cluster:
- *          |------------- cluster # N-------------|
- * |----- ex -----|                  |---- ex_right ----|
- *                  |------ requested region ------|
- *                  |================|
- *
- * In each of the above cases, we need to set the map->m_pblk and
- * map->m_len so it corresponds to the return the extent labelled as
- * "|====|" from cluster #N, since it is already in use for data in
- * cluster EXT4_B2C(sbi, map->m_lblk).	We will then return 1 to
- * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
- * as a new "allocated" block region.  Otherwise, we will return 0 and
- * ext4_ext_map_blocks() will then allocate one or more new clusters
- * by calling ext4_mb_new_blocks().
- */
-static int get_implied_cluster_alloc(struct super_block *sb,
-				     struct ext4_map_blocks *map,
-				     struct ext4_extent *ex,
-				     struct ext4_ext_path *path)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
-	ext4_lblk_t ex_cluster_start, ex_cluster_end;
-	ext4_lblk_t rr_cluster_start, rr_cluster_end;
-	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
-	ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
-	unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);
-
-	/* The extent passed in that we are trying to match */
-	ex_cluster_start = EXT4_B2C(sbi, ee_block);
-	ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
-
-	/* The requested region passed into ext4_map_blocks() */
-	rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
-	rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
-
-	if ((rr_cluster_start == ex_cluster_end) ||
-	    (rr_cluster_start == ex_cluster_start)) {
-		if (rr_cluster_start == ex_cluster_end)
-			ee_start += ee_len - 1;
-		map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
-			c_offset;
-		map->m_len = min(map->m_len,
-				 (unsigned) sbi->s_cluster_ratio - c_offset);
-		/*
-		 * Check for and handle this case:
-		 *
-		 *   |--------- cluster # N-------------|
-		 *		       |------- extent ----|
-		 *	   |--- requested region ---|
-		 *	   |===========|
-		 */
-
-		if (map->m_lblk < ee_block)
-			map->m_len = min(map->m_len, ee_block - map->m_lblk);
-
-		/*
-		 * Check for the case where there is already another allocated
-		 * block to the right of 'ex' but before the end of the cluster.
-		 *
-		 *          |------------- cluster # N-------------|
-		 * |----- ex -----|                  |---- ex_right ----|
-		 *                  |------ requested region ------|
-		 *                  |================|
-		 */
-		if (map->m_lblk > ee_block) {
-			ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
-			map->m_len = min(map->m_len, next - map->m_lblk);
-		}
-
-		trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
-		return 1;
-	}
-
-	trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
-	return 0;
-}
-
-
-/*
  * Block allocation/map/preallocation routine for extents based files
  *
  *
@@ -3798,7 +3693,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	unsigned int result = 0;
 	struct ext4_allocation_request ar;
 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
-	ext4_lblk_t cluster_offset;
 	struct ext4_map_blocks punch_map;
 
 	ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4010,7 +3904,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
 	newex.ee_block = EXT4_B2C(sbi,
 			cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
-	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
 
 	if (ex)
 		BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
@@ -4072,7 +3965,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags = 0;
 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
-	printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
 	if (!newblock)
 		goto out2;
@@ -4081,7 +3973,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	free_on_err = 1;
 	allocated_clusters = ar.len;
 
-got_allocated_blocks:
 	/* try to insert new extent into found leaf and return */
 	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(allocated_clusters);
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/9 bigalloc] ext4: zero out extra pages when users write one page
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (3 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 4/9] ext4: remove unused functions and tags Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 6/9 bigalloc] ext4: zero out extra pages when truncate file Robin Dong
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

When users write one page which in the middle of a cluster, we need to zero the
anthor pages around it.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/ext4.h  |   18 ++++
 fs/ext4/inode.c |  293 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 295 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1dea3e8..90ae8a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -675,6 +675,15 @@ struct move_extent {
 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
 #define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
 
+#define EXT4_MAX_CLUSTERSIZE 1048576
+#define EXT4_MAX_CTXT_PAGES (EXT4_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+
+/* tracking cluster write pages */
+struct ext4_write_cluster_ctxt {
+	unsigned long	w_num_pages;
+	struct page	*w_pages[EXT4_MAX_CTXT_PAGES];
+};
+
 /*
  * Extended fields will fit into an inode if the filesystem was formatted
  * with large inodes (-I 256 or larger) and there are not currently any EAs
@@ -1849,6 +1858,15 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 
 /* inode.c */
+int walk_page_buffers(handle_t *handle, struct buffer_head *head,
+		unsigned from, unsigned to, int *partial,
+		int (*fn)(handle_t *handle, struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh);
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void);
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc);
+int ext4_zero_cluster_page(struct inode *inode, int index,
+		struct ext4_write_cluster_ctxt *ewcc, unsigned flags);
+
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
 						ext4_lblk_t, int, int *);
 struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b83c3c..0ae546d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
+#include <linux/swap.h>
 
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -49,6 +50,31 @@
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
+static void ext4_write_cluster_add_page(struct ext4_write_cluster_ctxt *ewcc,
+		struct page *page)
+{
+	ewcc->w_pages[ewcc->w_num_pages] = page;
+	ewcc->w_num_pages++;
+}
+
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void)
+{
+	return kzalloc(sizeof(struct ext4_write_cluster_ctxt), GFP_NOFS);
+}
+
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc)
+{
+	int i;
+	for (i = 0; i < ewcc->w_num_pages; i++) {
+		if (ewcc->w_pages[i]) {
+			unlock_page(ewcc->w_pages[i]);
+			mark_page_accessed(ewcc->w_pages[i]);
+			page_cache_release(ewcc->w_pages[i]);
+		}
+	}
+	kfree(ewcc);
+}
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
@@ -656,7 +682,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 	return NULL;
 }
 
-static int walk_page_buffers(handle_t *handle,
+int walk_page_buffers(handle_t *handle,
 			     struct buffer_head *head,
 			     unsigned from,
 			     unsigned to,
@@ -712,7 +738,7 @@ static int walk_page_buffers(handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.
  */
-static int do_journal_get_write_access(handle_t *handle,
+int do_journal_get_write_access(handle_t *handle,
 				       struct buffer_head *bh)
 {
 	int dirty = buffer_dirty(bh);
@@ -738,15 +764,176 @@ static int do_journal_get_write_access(handle_t *handle,
 
 static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create);
+
+int ext4_cluster_write_begin(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block)
+{
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned to = from + len;
+	struct inode *inode = page->mapping->host;
+	unsigned block_start, block_end;
+	sector_t block;
+	int err = 0;
+	unsigned blocksize, bbits;
+	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+
+	BUG_ON(!PageLocked(page));
+	BUG_ON(from > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > to);
+
+	blocksize = 1 << inode->i_blkbits;
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, blocksize, 0);
+	head = page_buffers(page);
+
+	bbits = inode->i_blkbits;
+	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+
+	for (bh = head, block_start = 0; bh != head || !block_start;
+	    block++, block_start = block_end, bh = bh->b_this_page) {
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (PageUptodate(page)) {
+				if (!buffer_uptodate(bh))
+					set_buffer_uptodate(bh);
+			}
+			continue;
+		}
+		if (buffer_new(bh))
+			clear_buffer_new(bh);
+		if (!buffer_mapped(bh)) {
+			WARN_ON(bh->b_size != blocksize);
+			err = get_block(inode, block, bh, 1);
+			if (err)
+				break;
+			unmap_underlying_metadata(bh->b_bdev,
+						bh->b_blocknr);
+			if (PageUptodate(page)) {
+				clear_buffer_new(bh);
+				set_buffer_uptodate(bh);
+				mark_buffer_dirty(bh);
+				continue;
+			}
+			if (block_end > to || block_start < from)
+				zero_user_segments(page,
+					to, block_end,
+					block_start, from);
+			continue;
+		}
+		if (PageUptodate(page)) {
+			if (!buffer_uptodate(bh))
+				set_buffer_uptodate(bh);
+			continue;
+		}
+		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+		    !buffer_unwritten(bh) &&
+		     (block_start < from || block_end > to)) {
+			ll_rw_block(READ, 1, &bh);
+			*wait_bh++ = bh;
+		}
+	}
+	/*
+	 * If we issued read requests - let them complete.
+	 */
+	while (wait_bh > wait) {
+		wait_on_buffer(*--wait_bh);
+		if (!buffer_uptodate(*wait_bh))
+			err = -EIO;
+	}
+	if (unlikely(err))
+		page_zero_new_buffers(page, from, to);
+	return err;
+}
+
+int ext4_zero_cluster_page(struct inode *inode, int index,
+		struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+	int ret = 0;
+	struct page *page;
+
+	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ext4_write_cluster_add_page(ewcc, page);
+
+	/* if page is already uptodate and has buffers, don't get_block again
+	 */
+	if (PageUptodate(page) && PagePrivate(page))
+		goto out;
+
+	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	SetPageUptodate(page);
+	if (ext4_should_dioread_nolock(inode))
+		ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+				PAGE_CACHE_SIZE, ext4_get_block_write);
+	else
+		ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+				PAGE_CACHE_SIZE, ext4_get_block);
+
+out:
+	return ret;
+}
+
+int ext4_prepare_cluster_left_pages(struct inode *inode, int index,
+		struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	int ret = 0;
+	int block;
+	sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+	sector_t begin;
+
+	if (left_offset) {
+		begin = index - left_offset;
+		for (block = begin; block < index; block++) {
+			ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+int ext4_prepare_cluster_right_pages(struct inode *inode, int index,
+		struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	int ret = 0;
+	int block;
+	sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+	sector_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+	sector_t begin;
+
+	if (right_offset) {
+		begin = index + 1;
+		for (block = begin; block < index + right_offset + 1; block++) {
+			ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int ret, needed_blocks;
 	handle_t *handle;
-	int retries = 0;
-	struct page *page;
+	int retries = 0, uninit = 0;
+	struct page *page = NULL;
+	struct ext4_write_cluster_ctxt *ewcc;
 	pgoff_t index;
 	unsigned from, to;
 
@@ -761,6 +948,12 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 	to = from + len;
 
 retry:
+	ewcc = ext4_alloc_write_cluster_ctxt();
+	if (!ewcc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	handle = ext4_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -771,27 +964,76 @@ retry:
 	 * started */
 	flags |= AOP_FLAG_NOFS;
 
+	if (sbi->s_cluster_ratio > 1) {
+		/* We need to know whether the block is allocated already
+		 */
+		struct ext4_map_blocks map;
+		map.m_lblk = index;
+		map.m_len = 1;
+		ret = ext4_map_blocks(handle, inode, &map, 0);
+		uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+		if (ret <= 0 || uninit) {
+			ret = ext4_prepare_cluster_left_pages(inode, index,
+					ewcc, flags);
+			if (ret)
+				goto err_out;
+		}
+	}
+
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
-		ext4_journal_stop(handle);
 		ret = -ENOMEM;
-		goto out;
+		goto err_out;
 	}
+
 	*pagep = page;
 
-	if (ext4_should_dioread_nolock(inode))
-		ret = __block_write_begin(page, pos, len, ext4_get_block_write);
-	else
-		ret = __block_write_begin(page, pos, len, ext4_get_block);
+	ext4_write_cluster_add_page(ewcc, page);
+
+	/* if the block is already allocated by cluster, we should use
+	 * ext4_cluster_write_begin (it will not read buffer again)
+	 */
+	if (sbi->s_cluster_ratio > 1 && pos + len > inode->i_size) {
+		if (ext4_should_dioread_nolock(inode))
+			ret = ext4_cluster_write_begin(page, pos, len,
+					ext4_get_block_write);
+		else
+			ret = ext4_cluster_write_begin(page, pos, len,
+					ext4_get_block);
+	} else {
+		if (ext4_should_dioread_nolock(inode))
+			ret = __block_write_begin(page, pos, len,
+					ext4_get_block_write);
+		else
+			ret = __block_write_begin(page, pos, len,
+					ext4_get_block);
+	}
+
+	if (sbi->s_cluster_ratio > 1 && uninit) {
+		ret = ext4_prepare_cluster_right_pages(inode, index,
+				ewcc, flags);
+		if (ret)
+			goto err_out;
+	}
 
 	if (!ret && ext4_should_journal_data(inode)) {
-		ret = walk_page_buffers(handle, page_buffers(page),
+		int i;
+		unsigned long from, to;
+		for (i = 0; i < ewcc->w_num_pages; i++) {
+			page = ewcc->w_pages[i];
+			if (!page || !page_buffers(page))
+				continue;
+			from = page->index << PAGE_CACHE_SHIFT;
+			to = from + PAGE_CACHE_SIZE;
+			ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+			if (ret)
+				break;
+		}
 	}
 
 	if (ret) {
-		unlock_page(page);
-		page_cache_release(page);
+		ext4_free_write_cluster_ctxt(ewcc);
 		/*
 		 * __block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
@@ -819,8 +1061,15 @@ retry:
 
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
+
+	*fsdata = ewcc;
 out:
 	return ret;
+
+err_out:
+	ext4_free_write_cluster_ctxt(ewcc);
+	ext4_journal_stop(handle);
+	return ret;
 }
 
 /* For write_end() in data=journal mode */
@@ -837,11 +1086,24 @@ static int ext4_generic_write_end(struct file *file,
 				  loff_t pos, unsigned len, unsigned copied,
 				  struct page *page, void *fsdata)
 {
-	int i_size_changed = 0;
+	int i_size_changed = 0, i;
 	struct inode *inode = mapping->host;
+	struct ext4_write_cluster_ctxt *ewcc = fsdata;
 	handle_t *handle = ext4_journal_current_handle();
 
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+	for (i = 0; i < ewcc->w_num_pages; i++) {
+		unsigned long pos;
+		struct page *cluster_page;
+		cluster_page = ewcc->w_pages[i];
+		if (!cluster_page)
+			break;
+		if (cluster_page == page)
+			continue;
+		pos = cluster_page->index << PAGE_CACHE_SHIFT;
+		block_write_end(file, mapping, pos, PAGE_CACHE_SIZE,
+				PAGE_CACHE_SIZE, cluster_page, fsdata);
+	}
 
 	/*
 	 * No need to use i_size_read() here, the i_size
@@ -863,8 +1125,7 @@ static int ext4_generic_write_end(struct file *file,
 		ext4_update_i_disksize(inode, (pos + copied));
 		i_size_changed = 1;
 	}
-	unlock_page(page);
-	page_cache_release(page);
+	ext4_free_write_cluster_ctxt(ewcc);
 
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/9 bigalloc] ext4: zero out extra pages when truncate file
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (4 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 5/9 bigalloc] ext4: zero out extra pages when users write one page Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 7/9 bigalloc] ext4: allocate a cluster for a directory when it need spaces Robin Dong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

When truncate file to be larger, we need to zero out the pages which beyond
the old i_size.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/ext4.h     |    4 +-
 fs/ext4/extents.c  |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/inode.c    |   13 ++++----
 fs/ext4/ioctl.c    |    2 +-
 fs/ext4/super.c    |    2 +-
 fs/ext4/truncate.h |    2 +-
 6 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 90ae8a2..7d226af 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1886,7 +1886,7 @@ extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate(struct inode *);
+extern void ext4_truncate(struct inode *, loff_t oldsize);
 extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
 extern void ext4_set_inode_flags(struct inode *);
@@ -2267,7 +2267,7 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 				       int chunk);
 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			       struct ext4_map_blocks *map, int flags);
-extern void ext4_ext_truncate(struct inode *);
+extern void ext4_ext_truncate(struct inode *, loff_t oldsize);
 extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
 				loff_t length);
 extern void ext4_ext_init(struct super_block *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ccf12a0..f84c122 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4131,10 +4131,76 @@ out2:
 	return err ? err : result;
 }
 
-void ext4_ext_truncate(struct inode *inode)
+int ext4_ext_truncate_zero_pages(handle_t *handle, struct inode *inode,
+		loff_t old_size)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_write_cluster_ctxt *ewcc = NULL;
+	struct page *page;
+	ext4_lblk_t last_block = ((old_size + sb->s_blocksize - 1)
+			>> EXT4_BLOCK_SIZE_BITS(sb)) - 1;
+	ext4_lblk_t left_offset = last_block & (sbi->s_cluster_ratio - 1);
+	ext4_lblk_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+	ext4_lblk_t begin, index;
+	unsigned long i;
+	int ret = 0;
+	unsigned from, to;
+
+	if (sbi->s_cluster_ratio <= 1)
+		goto out;
+
+	if (right_offset) {
+		struct ext4_map_blocks map;
+		map.m_lblk = last_block;
+		map.m_len = 1;
+		if (ext4_map_blocks(handle, inode, &map, 0) <= 0
+			|| map.m_flags & EXT4_MAP_UNWRITTEN)
+			goto out;
+
+		ewcc = ext4_alloc_write_cluster_ctxt();
+		if (!ewcc) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		begin = last_block + 1;
+		for (index = begin; index < last_block + right_offset + 1;
+				index++) {
+			ret = ext4_zero_cluster_page(inode, index, ewcc,
+				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+			if (ret)
+				goto out;
+		}
+
+		if (ext4_should_journal_data(inode)) {
+			for (i = 0; i < ewcc->w_num_pages; i++) {
+				page = ewcc->w_pages[i];
+				if (!page || !page_buffers(page))
+					continue;
+				from = page->index << PAGE_CACHE_SHIFT;
+				to = from + PAGE_CACHE_SIZE;
+				ret = walk_page_buffers(handle,
+					page_buffers(page), from, to, NULL,
+					do_journal_get_write_access);
+				if (ret)
+					goto out;
+			}
+		}
+	}
+
+out:
+	if (ewcc)
+		ext4_free_write_cluster_ctxt(ewcc);
+
+	return ret;
+}
+
+void ext4_ext_truncate(struct inode *inode, loff_t old_size)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct super_block *sb = inode->i_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_lblk_t last_block;
 	handle_t *handle;
 	int err = 0;
@@ -4156,6 +4222,9 @@ void ext4_ext_truncate(struct inode *inode)
 	if (inode->i_size & (sb->s_blocksize - 1))
 		ext4_block_truncate_page(handle, mapping, inode->i_size);
 
+	if (ext4_ext_truncate_zero_pages(handle, inode, old_size))
+		goto out_stop;
+
 	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
 
@@ -4176,6 +4245,13 @@ void ext4_ext_truncate(struct inode *inode)
 
 	last_block = (inode->i_size + sb->s_blocksize - 1)
 			>> EXT4_BLOCK_SIZE_BITS(sb);
+
+	if (sbi->s_cluster_ratio > 1 &&
+			(last_block & (sbi->s_cluster_ratio - 1))) {
+		last_block = (last_block & ~(sbi->s_cluster_ratio - 1)) +
+			sbi->s_cluster_ratio;
+	}
+
 	err = ext4_ext_remove_space(inode, last_block);
 
 	/* In a multi-transaction truncate, we only make the final
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0ae546d..be922ae 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -213,7 +213,7 @@ void ext4_evict_inode(struct inode *inode)
 		goto stop_handle;
 	}
 	if (inode->i_blocks)
-		ext4_truncate(inode);
+		ext4_truncate(inode, 0);
 
 	/*
 	 * ext4_ext_truncate() doesn't reserve any slop when it
@@ -3438,7 +3438,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
  * that's fine - as long as they are linked from the inode, the post-crash
  * ext4_truncate() run will find them and release them.
  */
-void ext4_truncate(struct inode *inode)
+void ext4_truncate(struct inode *inode, loff_t old_size)
 {
 	trace_ext4_truncate_enter(inode);
 
@@ -3451,7 +3451,7 @@ void ext4_truncate(struct inode *inode)
 		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ext4_ext_truncate(inode);
+		ext4_ext_truncate(inode, old_size);
 	else
 		ext4_ind_truncate(inode);
 
@@ -4218,11 +4218,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		if (attr->ia_size != i_size_read(inode)) {
+		loff_t old_size = i_size_read(inode);
+		if (attr->ia_size != old_size) {
 			truncate_setsize(inode, attr->ia_size);
-			ext4_truncate(inode);
+			ext4_truncate(inode, old_size);
 		} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
-			ext4_truncate(inode);
+			ext4_truncate(inode, 0);
 	}
 
 	if (!rc) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 4a5081a..6eb2f4f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -100,7 +100,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				goto flags_out;
 			}
 		} else if (oldflags & EXT4_EOFBLOCKS_FL)
-			ext4_truncate(inode);
+			ext4_truncate(inode, 0);
 
 		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle)) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2cf4ae0..beea7a1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2229,7 +2229,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 				__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
-			ext4_truncate(inode);
+			ext4_truncate(inode, 0);
 			nr_truncates++;
 		} else {
 			ext4_msg(sb, KERN_DEBUG,
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba66..2be0783 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,7 +11,7 @@
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
 	truncate_inode_pages(inode->i_mapping, inode->i_size);
-	ext4_truncate(inode);
+	ext4_truncate(inode, 0);
 }
 
 /*
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 7/9 bigalloc] ext4: allocate a cluster for a directory when it need spaces
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (5 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 6/9 bigalloc] ext4: zero out extra pages when truncate file Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 8/9 bigalloc] ext4: align fallocate size to a whole cluster Robin Dong
  2011-11-09 11:17 ` [PATCH 9/9 bigalloc] ext4: make cluster works for mmap Robin Dong
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/inode.c |    9 +++++++++
 fs/ext4/namei.c |   46 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be922ae..69d0031 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -673,6 +673,15 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 		return bh;
 	if (buffer_uptodate(bh))
 		return bh;
+	/* if the block has been allocated by cluster,
+	 * do not need to read it
+	 */
+	if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
+		((block << inode->i_sb->s_blocksize_bits) >= inode->i_size)) {
+		memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+		set_buffer_uptodate(bh);
+		return bh;
+	}
 	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924fa..43f7dcb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1811,10 +1811,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	handle_t *handle;
 	struct inode *inode;
-	struct buffer_head *dir_block = NULL;
+	struct buffer_head *first_block = NULL;
+	struct buffer_head **dir_block = NULL;
 	struct ext4_dir_entry_2 *de;
+	struct ext4_sb_info *sbi = EXT4_SB(dir->i_sb);
 	unsigned int blocksize = dir->i_sb->s_blocksize;
-	int err, retries = 0;
+	int i, err, retries = 0;
 
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
@@ -1824,6 +1826,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					sbi->s_cluster_ratio +
 					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -1840,14 +1843,24 @@ retry:
 	inode->i_op = &ext4_dir_inode_operations;
 	inode->i_fop = &ext4_dir_operations;
 	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-	dir_block = ext4_bread(handle, inode, 0, 1, &err);
+
+	dir_block = kzalloc(sizeof(struct buffer_head *) * EXT4_MAX_CTXT_PAGES,
+			GFP_NOFS);
 	if (!dir_block)
-		goto out_clear_inode;
-	BUFFER_TRACE(dir_block, "get_write_access");
-	err = ext4_journal_get_write_access(handle, dir_block);
-	if (err)
-		goto out_clear_inode;
-	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
+		goto out_stop;
+
+	for (i = 0; i < sbi->s_cluster_ratio; i++) {
+		dir_block[i] = ext4_getblk(handle, inode, i, 1, &err);
+		if (!dir_block[i])
+			goto out_clear_inode;
+		memset(dir_block[i]->b_data, 0, inode->i_sb->s_blocksize);
+		set_buffer_uptodate(dir_block[i]);
+		err = ext4_journal_get_write_access(handle, dir_block[i]);
+		if (err)
+			goto out_clear_inode;
+	}
+	first_block = dir_block[0];
+	de = (struct ext4_dir_entry_2 *) first_block->b_data;
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
 	de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
@@ -1862,10 +1875,13 @@ retry:
 	strcpy(de->name, "..");
 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
 	inode->i_nlink = 2;
-	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-	err = ext4_handle_dirty_metadata(handle, dir, dir_block);
-	if (err)
-		goto out_clear_inode;
+	BUFFER_TRACE(first_block, "call ext4_handle_dirty_metadata");
+
+	for (i = 0; i < sbi->s_cluster_ratio; i++) {
+		err = ext4_handle_dirty_metadata(handle, dir, dir_block[i]);
+		if (err)
+			goto out_clear_inode;
+	}
 	err = ext4_mark_inode_dirty(handle, inode);
 	if (!err)
 		err = ext4_add_entry(handle, dentry, inode);
@@ -1885,7 +1901,9 @@ out_clear_inode:
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
 out_stop:
-	brelse(dir_block);
+	for (i = 0; i < sbi->s_cluster_ratio; i++)
+		brelse(dir_block[i]);
+	kfree(dir_block);
 	ext4_journal_stop(handle);
 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 8/9 bigalloc] ext4: align fallocate size to a whole cluster
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (6 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 7/9 bigalloc] ext4: allocate a cluster for a directory when it need spaces Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  2011-11-09 11:17 ` [PATCH 9/9 bigalloc] ext4: make cluster works for mmap Robin Dong
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/extents.c |   34 ++++++++++++++++++++++++++++++++--
 1 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f84c122..7e3d4c8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3533,8 +3533,11 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path, int flags,
 			unsigned int allocated, ext4_fsblk_t newblock)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_map_blocks convert_map;
 	int ret = 0;
 	int err = 0;
+	int offset;
 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
 
 	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
@@ -3598,8 +3601,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 	}
 
 	/* buffered write, writepage time, convert*/
-	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
+	offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+	convert_map.m_len =
+		EXT4_C2B(sbi, EXT4_NUM_B2C(sbi, offset + map->m_len));
+	convert_map.m_lblk = map->m_lblk - offset;
+	ret = ext4_ext_convert_to_initialized(handle, inode,
+			&convert_map, path);
 	if (ret >= 0) {
+		ret = map->m_len;
 		ext4_update_inode_fsync_trans(handle, inode, 1);
 		err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
 					 map->m_len);
@@ -4318,8 +4327,9 @@ static void ext4_falloc_update_inode(struct inode *inode,
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	handle_t *handle;
-	loff_t new_size;
+	loff_t new_size, old_size;
 	unsigned int max_blocks;
 	int ret = 0;
 	int ret2 = 0;
@@ -4349,6 +4359,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	 */
 	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
 		- map.m_lblk;
+
+	old_size = i_size_read(inode);
 	/*
 	 * credits to insert 1 extent into extent tree
 	 */
@@ -4403,6 +4415,24 @@ retry:
 		goto retry;
 	}
 	mutex_unlock(&inode->i_mutex);
+
+	/* if the fallocate expand the file size, we need to zeroout
+	 * extra pages in cluster */
+	if (len + offset > old_size) {
+		credits = ext4_chunk_trans_blocks(inode, sbi->s_cluster_ratio);
+		handle = ext4_journal_start(inode, credits);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out;
+		}
+		ext4_ext_truncate_zero_pages(handle, inode, old_size);
+		if (IS_SYNC(inode))
+			ext4_handle_sync(handle);
+		ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_stop(handle);
+	}
+
+out:
 	trace_ext4_fallocate_exit(inode, offset, max_blocks,
 				ret > 0 ? ret2 : ret);
 	return ret > 0 ? ret2 : ret;
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 9/9 bigalloc] ext4: make cluster works for mmap
  2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
                   ` (7 preceding siblings ...)
  2011-11-09 11:17 ` [PATCH 8/9 bigalloc] ext4: align fallocate size to a whole cluster Robin Dong
@ 2011-11-09 11:17 ` Robin Dong
  8 siblings, 0 replies; 10+ messages in thread
From: Robin Dong @ 2011-11-09 11:17 UTC (permalink / raw)
  To: linux-ext4; +Cc: Robin Dong

From: Robin Dong <sanbai@taobao.com>

When users write a page in mmap regioin, it need to zero out other
pages around it.

Signed-off-by: Robin Dong <sanbai@taobao.com>
---
 fs/ext4/inode.c |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 69d0031..4a25767 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4649,13 +4649,17 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct page *page = vmf->page;
 	loff_t size;
 	unsigned long len;
-	int ret;
+	int ret, i, uninit = 0;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file->f_path.dentry->d_inode;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct address_space *mapping = inode->i_mapping;
+	struct ext4_write_cluster_ctxt *ewcc = NULL;
 	handle_t *handle;
 	get_block_t *get_block;
 	int retries = 0;
+	unsigned int flags = AOP_FLAG_NOFS;
+	unsigned long from, to;
 
 	/*
 	 * This check is racy but catches the common case. We rely on
@@ -4712,7 +4716,47 @@ retry_alloc:
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
+
+	ewcc = ext4_alloc_write_cluster_ctxt();
+	if (!ewcc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (sbi->s_cluster_ratio > 1) {
+		/* We need to know whether the block is allocated already
+		 */
+		struct ext4_map_blocks map;
+		map.m_lblk = page->index;
+		map.m_len = 1;
+		ret = ext4_map_blocks(handle, inode, &map, 0);
+		uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+		if (ret <= 0 || uninit) {
+			ret = ext4_prepare_cluster_left_pages(inode,
+					page->index, ewcc, flags);
+			if (ret)
+				goto err_out;
+		}
+	}
+
 	ret = __block_page_mkwrite(vma, vmf, get_block);
+	if (ret)
+		goto err_out;
+
+	if (sbi->s_cluster_ratio > 1 && uninit) {
+		ret = ext4_prepare_cluster_right_pages(inode, page->index,
+				ewcc, flags);
+		if (ret)
+			goto err_out;
+		for (i = 0; i < ewcc->w_num_pages; i++) {
+			if (!ewcc->w_pages[i] ||
+					!page_buffers(ewcc->w_pages[i]))
+				break;
+			block_commit_write(ewcc->w_pages[i],
+					0, PAGE_CACHE_SIZE);
+		}
+	}
+
 	if (!ret && ext4_should_journal_data(inode)) {
 		if (walk_page_buffers(handle, page_buffers(page), 0,
 			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
@@ -4720,13 +4764,36 @@ retry_alloc:
 			ret = VM_FAULT_SIGBUS;
 			goto out;
 		}
+
+		for (i = 0; i < ewcc->w_num_pages; i++) {
+			page = ewcc->w_pages[i];
+			if (!page || !page_buffers(page))
+				continue;
+			from = page->index << PAGE_CACHE_SHIFT;
+			to = from + PAGE_CACHE_SIZE;
+			ret = walk_page_buffers(handle, page_buffers(page),
+				from, to, NULL, do_journal_get_write_access);
+			if (ret) {
+				ret = VM_FAULT_SIGBUS;
+				goto out;
+			}
+		}
 		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	}
+
+err_out:
+	if (ewcc) {
+		ext4_free_write_cluster_ctxt(ewcc);
+		ewcc = NULL;
+	}
 	ext4_journal_stop(handle);
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry_alloc;
 out_ret:
 	ret = block_page_mkwrite_return(ret);
+
 out:
+	if (ewcc)
+		ext4_free_write_cluster_ctxt(ewcc);
 	return ret;
 }
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-11-09 11:17 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-09 11:16 [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster Robin Dong
2011-11-09 11:17 ` [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks Robin Dong
2011-11-09 11:17 ` [PATCH 2/9 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks Robin Dong
2011-11-09 11:17 ` [PATCH 3/9 bigalloc] ext4: change unit of ee_block of extent to cluster Robin Dong
2011-11-09 11:17 ` [PATCH 4/9] ext4: remove unused functions and tags Robin Dong
2011-11-09 11:17 ` [PATCH 5/9 bigalloc] ext4: zero out extra pages when users write one page Robin Dong
2011-11-09 11:17 ` [PATCH 6/9 bigalloc] ext4: zero out extra pages when truncate file Robin Dong
2011-11-09 11:17 ` [PATCH 7/9 bigalloc] ext4: allocate a cluster for a directory when it need spaces Robin Dong
2011-11-09 11:17 ` [PATCH 8/9 bigalloc] ext4: align fallocate size to a whole cluster Robin Dong
2011-11-09 11:17 ` [PATCH 9/9 bigalloc] ext4: make cluster works for mmap Robin Dong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).