linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] ext4: migrate cleanup
@ 2011-09-17 13:32 Dmitry Monakhov
  2011-09-17 13:32 ` [PATCH 2/3] ext4: fix quota accounting during migration Dmitry Monakhov
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Dmitry Monakhov @ 2011-09-17 13:32 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso, Dmitry Monakhov

This patch cleanup code a bit, actual logic not changed
- Move current block pointer to migrate_structure, let's all
  walk info will be in one structure.
- Get rid of usless null ind-block ptr checks, caller already
  does that check.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/migrate.c |  101 ++++++++++++++++++-----------------------------------
 1 files changed, 34 insertions(+), 67 deletions(-)

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b57b98f..e263d78 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -21,13 +21,14 @@
  * The contiguous blocks details which can be
  * represented by a single extent
  */
-struct list_blocks_struct {
-	ext4_lblk_t first_block, last_block;
+struct migrate_struct {
+	ext4_lblk_t first_block, last_block, curr_block;
 	ext4_fsblk_t first_pblock, last_pblock;
+
 };
 
 static int finish_range(handle_t *handle, struct inode *inode,
-				struct list_blocks_struct *lb)
+				struct migrate_struct *lb)
 
 {
 	int retval = 0, needed;
@@ -87,8 +88,7 @@ err_out:
 }
 
 static int update_extent_range(handle_t *handle, struct inode *inode,
-				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
-				struct list_blocks_struct *lb)
+				ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
 	int retval;
 	/*
@@ -96,9 +96,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
 	 */
 	if (lb->first_pblock &&
 		(lb->last_pblock+1 == pblock) &&
-		(lb->last_block+1 == blk_num)) {
+		(lb->last_block+1 == lb->curr_block)) {
 		lb->last_pblock = pblock;
-		lb->last_block = blk_num;
+		lb->last_block = lb->curr_block;
+		lb->curr_block++;
 		return 0;
 	}
 	/*
@@ -106,64 +107,47 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
 	 */
 	retval = finish_range(handle, inode, lb);
 	lb->first_pblock = lb->last_pblock = pblock;
-	lb->first_block = lb->last_block = blk_num;
-
+	lb->first_block = lb->last_block = lb->curr_block;
+	lb->curr_block++;
 	return retval;
 }
 
 static int update_ind_extent_range(handle_t *handle, struct inode *inode,
-				   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				   struct list_blocks_struct *lb)
+				ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries;
-		return 0;
-	}
-
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
 
 	i_data = (__le32 *)bh->b_data;
-	for (i = 0; i < max_entries; i++, blk_count++) {
+	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						blk_count, lb);
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
+		} else {
+			lb->curr_block++;
 		}
 	}
-
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
 }
 
 static int update_dind_extent_range(handle_t *handle, struct inode *inode,
-				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				    struct list_blocks_struct *lb)
+				ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries * max_entries;
-		return 0;
-	}
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
@@ -172,38 +156,27 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_ind_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						&blk_count, lb);
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
 		} else {
 			/* Only update the file block number */
-			blk_count += max_entries;
+			lb->curr_block += max_entries;
 		}
 	}
-
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
 }
 
 static int update_tind_extent_range(handle_t *handle, struct inode *inode,
-				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
-				     struct list_blocks_struct *lb)
+				ext4_fsblk_t pblock, struct migrate_struct *lb)
 {
 	struct buffer_head *bh;
 	__le32 *i_data;
 	int i, retval = 0;
-	ext4_lblk_t blk_count = *blk_nump;
 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 
-	if (!pblock) {
-		/* Only update the file block number */
-		*blk_nump += max_entries * max_entries * max_entries;
-		return 0;
-	}
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
@@ -211,17 +184,15 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 	i_data = (__le32 *)bh->b_data;
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
-			retval = update_dind_extent_range(handle, inode,
-						le32_to_cpu(i_data[i]),
-						&blk_count, lb);
+			retval = update_ind_extent_range(handle, inode,
+						le32_to_cpu(i_data[i]), lb);
 			if (retval)
 				break;
-		} else
+		} else {
 			/* Only update the file block number */
-			blk_count += max_entries * max_entries;
+			lb->curr_block += max_entries * max_entries;
+		}
 	}
-	/* Update the file block number */
-	*blk_nump = blk_count;
 	put_bh(bh);
 	return retval;
 
@@ -462,10 +433,9 @@ int ext4_ext_migrate(struct inode *inode)
 	handle_t *handle;
 	int retval = 0, i;
 	__le32 *i_data;
-	ext4_lblk_t blk_count = 0;
 	struct ext4_inode_info *ei;
 	struct inode *tmp_inode = NULL;
-	struct list_blocks_struct lb;
+	struct migrate_struct lb;
 	unsigned long max_entries;
 	__u32 goal;
 
@@ -551,35 +521,32 @@ int ext4_ext_migrate(struct inode *inode)
 
 	/* 32 bit block address 4 bytes */
 	max_entries = inode->i_sb->s_blocksize >> 2;
-	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
+	for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
 		if (i_data[i]) {
 			retval = update_extent_range(handle, tmp_inode,
-						le32_to_cpu(i_data[i]),
-						blk_count, &lb);
+						le32_to_cpu(i_data[i]), &lb);
 			if (retval)
 				goto err_out;
-		}
+		} else
+			lb.curr_block++;
 	}
 	if (i_data[EXT4_IND_BLOCK]) {
 		retval = update_ind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	} else
-		blk_count +=  max_entries;
+		lb.curr_block += max_entries;
 	if (i_data[EXT4_DIND_BLOCK]) {
 		retval = update_dind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	} else
-		blk_count += max_entries * max_entries;
+		lb.curr_block += max_entries * max_entries;
 	if (i_data[EXT4_TIND_BLOCK]) {
 		retval = update_tind_extent_range(handle, tmp_inode,
-					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
-					&blk_count, &lb);
+				le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
 			if (retval)
 				goto err_out;
 	}
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] ext4: fix quota accounting during migration
  2011-09-17 13:32 [PATCH 1/3] ext4: migrate cleanup Dmitry Monakhov
@ 2011-09-17 13:32 ` Dmitry Monakhov
  2011-10-29 13:09   ` Ted Ts'o
  2011-09-17 13:32 ` [PATCH 3/3] ext4: fix block swap procedure on migration V2 Dmitry Monakhov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Dmitry Monakhov @ 2011-09-17 13:32 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso, Dmitry Monakhov

tmp_inode should have same uid/gid as  original inode.
Otherwise new metadata blocks will be accounted in to wrong
quota-id which result in quota leak after indirect blocks swap.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/ext4.h    |    3 ++-
 fs/ext4/ialloc.c  |    9 ++++++---
 fs/ext4/migrate.c |    7 +++++--
 fs/ext4/namei.c   |    8 ++++----
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e717dfd..3aa2f7c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1775,7 +1775,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
 
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-				    const struct qstr *qstr, __u32 goal);
+				const struct qstr *qstr, __u32 goal,
+				uid_t *owner);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9c63f27..9bfa93e 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -802,7 +802,7 @@ err_ret:
  * group to find a free inode.
  */
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
-			     const struct qstr *qstr, __u32 goal)
+			const struct qstr *qstr, __u32 goal, uid_t *owner)
 {
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
@@ -987,8 +987,11 @@ got:
 		flex_group = ext4_flex_group(sbi, group);
 		atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
 	}
-
-	if (test_opt(sb, GRPID)) {
+	if (owner) {
+		inode->i_mode = mode;
+		inode->i_uid = owner[0];
+		inode->i_gid = owner[1];
+	} else if (test_opt(sb, GRPID)) {
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = dir->i_gid;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index e263d78..74f2900 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -438,6 +438,7 @@ int ext4_ext_migrate(struct inode *inode)
 	struct migrate_struct lb;
 	unsigned long max_entries;
 	__u32 goal;
+	uid_t owner[2];
 
 	/*
 	 * If the filesystem does not support extents, or the inode
@@ -465,10 +466,12 @@ int ext4_ext_migrate(struct inode *inode)
 	}
 	goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 		EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
+	owner[0] = inode->i_uid;
+	owner[1] = inode->i_gid;
 	tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-				   S_IFREG, NULL, goal);
+				S_IFREG, NULL, goal, owner);
 	if (IS_ERR(tmp_inode)) {
-		retval = -ENOMEM;
+		retval = PTR_ERR(inode);
 		ext4_journal_stop(handle);
 		return retval;
 	}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f8068c7..d5f50de 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1755,7 +1755,7 @@ retry:
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext4_file_inode_operations;
@@ -1791,7 +1791,7 @@ retry:
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
@@ -1831,7 +1831,7 @@ retry:
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-			       &dentry->d_name, 0);
+				&dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
@@ -2278,7 +2278,7 @@ retry:
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-			       &dentry->d_name, 0);
+				&dentry->d_name, 0, NULL);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] ext4: fix block swap procedure on migration V2
  2011-09-17 13:32 [PATCH 1/3] ext4: migrate cleanup Dmitry Monakhov
  2011-09-17 13:32 ` [PATCH 2/3] ext4: fix quota accounting during migration Dmitry Monakhov
@ 2011-09-17 13:32 ` Dmitry Monakhov
  2011-09-19 16:29   ` Andreas Dilger
  2011-10-28 20:34 ` Ping Dmitry Monakhov
  2011-10-29 12:31 ` [PATCH 1/3] ext4: migrate cleanup Ted Ts'o
  3 siblings, 1 reply; 9+ messages in thread
From: Dmitry Monakhov @ 2011-09-17 13:32 UTC (permalink / raw)
  To: linux-ext4; +Cc: aneesh.kumar, tytso, Dmitry Monakhov

Currently if system halted in the midle of migration procedure
tmp_inode will be truncated on next mount during orphan list cleanup
as a regular inode, but in fact it is the special one. Temporal inode
refers to original's inode leaf blocks, but does not own it.
And we should free only index blocks, but not a leaf one.
We have to somehow flag migration inode as a special one, and then
cleanup it accordingly. The flag in question should survive
trough reboots.
This patch introduce new inode flag for this purpose. Yes I know
that may seems not optimal to use didicated flag for such a rare case,
and if you know a better way, or a good flag that we can share please
say tell me. Also we don't need didicated (migration only) cleanup
functions. It is reasonable just skip leaf blocks for inodes with
migration flag enabled inode inside ext4_truncate(). So tmp_inode will
be cleaned automatically on last iput()

- Introduce new inode flag for temporal inodes
- Move cleanup logic to truncate.
- Remove duplicated code. We no longer need it.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/ext4.h     |    3 +
 fs/ext4/extents.c  |   10 ++-
 fs/ext4/indirect.c |    4 +
 fs/ext4/migrate.c  |  247 +++++-----------------------------------------------
 4 files changed, 35 insertions(+), 229 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3aa2f7c..8a6f612 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -350,6 +350,7 @@ struct flex_groups {
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
 #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
 #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
+#define EXT4_MIGRATE_FL			0x10000000 /* Inode used for data migration */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
 #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
@@ -407,6 +408,7 @@ enum {
 	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
 	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
 	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
+	EXT4_INODE_MIGRATE	= 28,	/* Inode used for data migration */
 	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
 };
 
@@ -453,6 +455,7 @@ static inline void ext4_check_flag_values(void)
 	CHECK_FLAG_VALUE(EXTENTS);
 	CHECK_FLAG_VALUE(EA_INODE);
 	CHECK_FLAG_VALUE(EOFBLOCKS);
+	CHECK_FLAG_VALUE(MIGRATE);
 	CHECK_FLAG_VALUE(RESERVED);
 }
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cf568..0ac5a63 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2416,10 +2416,12 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (err)
 			goto out;
 
-		err = ext4_remove_blocks(handle, inode, ex, a, b);
-		if (err)
-			goto out;
-
+		/* Migration inode does not own it's leaf blocks */
+		if (!ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE)) {
+			err = ext4_remove_blocks(handle, inode, ex, a, b);
+			if (err)
+				goto out;
+		}
 		if (num == 0) {
 			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 0962642..4581d0e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1147,6 +1147,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
 					       for current block */
 	int err = 0;
 
+	/* Migration inode does not own it's data blocks */
+	if (ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE))
+		return;
+
 	if (this_bh) {				/* For indirect block */
 		BUFFER_TRACE(this_bh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, this_bh);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 74f2900..a75e40d 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -24,6 +24,7 @@
 struct migrate_struct {
 	ext4_lblk_t first_block, last_block, curr_block;
 	ext4_fsblk_t first_pblock, last_pblock;
+	blkcnt_t ind_blocks;
 
 };
 
@@ -135,6 +136,7 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 			lb->curr_block++;
 		}
 	}
+	lb->ind_blocks++;
 	put_bh(bh);
 	return retval;
 
@@ -164,6 +166,7 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 			lb->curr_block += max_entries;
 		}
 	}
+	lb->ind_blocks++;
 	put_bh(bh);
 	return retval;
 
@@ -193,144 +196,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 			lb->curr_block += max_entries * max_entries;
 		}
 	}
+	lb->ind_blocks++;
 	put_bh(bh);
 	return retval;
 
 }
 
-static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
-{
-	int retval = 0, needed;
-
-	if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
-		return 0;
-	/*
-	 * We are freeing a blocks. During this we touch
-	 * superblock, group descriptor and block bitmap.
-	 * So allocate a credit of 3. We may update
-	 * quota (user and group).
-	 */
-	needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
-
-	if (ext4_journal_extend(handle, needed) != 0)
-		retval = ext4_journal_restart(handle, needed);
-
-	return retval;
-}
-
-static int free_dind_blocks(handle_t *handle,
-				struct inode *inode, __le32 i_data)
-{
-	int i;
-	__le32 *tmp_idata;
-	struct buffer_head *bh;
-	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
-	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
-	if (!bh)
-		return -EIO;
-
-	tmp_idata = (__le32 *)bh->b_data;
-	for (i = 0; i < max_entries; i++) {
-		if (tmp_idata[i]) {
-			extend_credit_for_blkdel(handle, inode);
-			ext4_free_blocks(handle, inode, NULL,
-					 le32_to_cpu(tmp_idata[i]), 1,
-					 EXT4_FREE_BLOCKS_METADATA |
-					 EXT4_FREE_BLOCKS_FORGET);
-		}
-	}
-	put_bh(bh);
-	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
-			 EXT4_FREE_BLOCKS_METADATA |
-			 EXT4_FREE_BLOCKS_FORGET);
-	return 0;
-}
-
-static int free_tind_blocks(handle_t *handle,
-				struct inode *inode, __le32 i_data)
-{
-	int i, retval = 0;
-	__le32 *tmp_idata;
-	struct buffer_head *bh;
-	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
-
-	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
-	if (!bh)
-		return -EIO;
-
-	tmp_idata = (__le32 *)bh->b_data;
-	for (i = 0; i < max_entries; i++) {
-		if (tmp_idata[i]) {
-			retval = free_dind_blocks(handle,
-					inode, tmp_idata[i]);
-			if (retval) {
-				put_bh(bh);
-				return retval;
-			}
-		}
-	}
-	put_bh(bh);
-	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
-			 EXT4_FREE_BLOCKS_METADATA |
-			 EXT4_FREE_BLOCKS_FORGET);
-	return 0;
-}
-
-static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
-{
-	int retval;
-
-	/* ei->i_data[EXT4_IND_BLOCK] */
-	if (i_data[0]) {
-		extend_credit_for_blkdel(handle, inode);
-		ext4_free_blocks(handle, inode, NULL,
-				le32_to_cpu(i_data[0]), 1,
-				 EXT4_FREE_BLOCKS_METADATA |
-				 EXT4_FREE_BLOCKS_FORGET);
-	}
-
-	/* ei->i_data[EXT4_DIND_BLOCK] */
-	if (i_data[1]) {
-		retval = free_dind_blocks(handle, inode, i_data[1]);
-		if (retval)
-			return retval;
-	}
-
-	/* ei->i_data[EXT4_TIND_BLOCK] */
-	if (i_data[2]) {
-		retval = free_tind_blocks(handle, inode, i_data[2]);
-		if (retval)
-			return retval;
-	}
-	return 0;
-}
-
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
-						struct inode *tmp_inode)
+				struct inode *tmp_inode,
+				struct migrate_struct *mreq)
 {
 	int retval;
-	__le32	i_data[3];
+	__le32  i_data[EXT4_N_BLOCKS];
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
-
 	/*
-	 * One credit accounted for writing the
-	 * i_data field of the original inode
+	 * Two credits accounted for writing the
+	 * i_data field of the two inodes
 	 */
-	retval = ext4_journal_extend(handle, 1);
-	if (retval) {
+	if (ext4_journal_extend(handle, 2) != 0) {
 		retval = ext4_journal_restart(handle, 1);
 		if (retval)
 			goto err_out;
 	}
-
-	i_data[0] = ei->i_data[EXT4_IND_BLOCK];
-	i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
-	i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
-
+	memcpy(i_data, ei->i_data, sizeof(ei->i_data));
 	down_write(&EXT4_I(inode)->i_data_sem);
 	/*
 	 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
@@ -345,89 +234,31 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 		ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 	/*
 	 * We have the extent map build with the tmp inode.
-	 * Now copy the i_data across
+	 * Now swap i_data maps
 	 */
 	ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
 	memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
-
+	ext4_clear_inode_flag(tmp_inode, EXT4_INODE_EXTENTS);
+	memcpy(tmp_ei->i_data, i_data, sizeof(ei->i_data));
 	/*
 	 * Update i_blocks with the new blocks that got
 	 * allocated while adding extents for extent index
 	 * blocks.
 	 *
-	 * While converting to extents we need not
-	 * update the orignal inode i_blocks for extent blocks
-	 * via quota APIs. The quota update happened via tmp_inode already.
+	 * While converting to extents new meta_data blocks was accounted for
+	 * tmp_inode, swap counter info with original inode.
 	 */
+	mreq->ind_blocks <<= (inode->i_blkbits - 9);
 	spin_lock(&inode->i_lock);
-	inode->i_blocks += tmp_inode->i_blocks;
+	inode->i_blocks += tmp_inode->i_blocks - mreq->ind_blocks;
 	spin_unlock(&inode->i_lock);
+	tmp_inode->i_blocks = mreq->ind_blocks;
 	up_write(&EXT4_I(inode)->i_data_sem);
-
-	/*
-	 * We mark the inode dirty after, because we decrement the
-	 * i_blocks when freeing the indirect meta-data blocks
-	 */
-	retval = free_ind_block(handle, inode, i_data);
 	ext4_mark_inode_dirty(handle, inode);
-
 err_out:
 	return retval;
 }
 
-static int free_ext_idx(handle_t *handle, struct inode *inode,
-					struct ext4_extent_idx *ix)
-{
-	int i, retval = 0;
-	ext4_fsblk_t block;
-	struct buffer_head *bh;
-	struct ext4_extent_header *eh;
-
-	block = ext4_idx_pblock(ix);
-	bh = sb_bread(inode->i_sb, block);
-	if (!bh)
-		return -EIO;
-
-	eh = (struct ext4_extent_header *)bh->b_data;
-	if (eh->eh_depth != 0) {
-		ix = EXT_FIRST_INDEX(eh);
-		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
-			retval = free_ext_idx(handle, inode, ix);
-			if (retval)
-				break;
-		}
-	}
-	put_bh(bh);
-	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, NULL, block, 1,
-			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
-	return retval;
-}
-
-/*
- * Free the extent meta data blocks only
- */
-static int free_ext_block(handle_t *handle, struct inode *inode)
-{
-	int i, retval = 0;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
-	struct ext4_extent_idx *ix;
-	if (eh->eh_depth == 0)
-		/*
-		 * No extra blocks allocated for extent meta data
-		 */
-		return 0;
-	ix = EXT_FIRST_INDEX(eh);
-	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
-		retval = free_ext_idx(handle, inode, ix);
-		if (retval)
-			return retval;
-	}
-	return retval;
-
-}
-
 int ext4_ext_migrate(struct inode *inode)
 {
 	handle_t *handle;
@@ -481,7 +312,7 @@ int ext4_ext_migrate(struct inode *inode)
 	 * when we drop inode reference.
 	 */
 	tmp_inode->i_nlink = 0;
-
+	ext4_set_inode_flag(tmp_inode, EXT4_INODE_MIGRATE);
 	ext4_ext_tree_init(handle, tmp_inode);
 	ext4_orphan_add(handle, tmp_inode);
 	ext4_journal_stop(handle);
@@ -557,44 +388,10 @@ int ext4_ext_migrate(struct inode *inode)
 	 * Build the last extent
 	 */
 	retval = finish_range(handle, tmp_inode, &lb);
+	if (!retval)
+		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode, &lb);
 err_out:
-	if (retval)
-		/*
-		 * Failure case delete the extent information with the
-		 * tmp_inode
-		 */
-		free_ext_block(handle, tmp_inode);
-	else {
-		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
-		if (retval)
-			/*
-			 * if we fail to swap inode data free the extent
-			 * details of the tmp inode
-			 */
-			free_ext_block(handle, tmp_inode);
-	}
 
-	/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
-	if (ext4_journal_extend(handle, 1) != 0)
-		ext4_journal_restart(handle, 1);
-
-	/*
-	 * Mark the tmp_inode as of size zero
-	 */
-	i_size_write(tmp_inode, 0);
-
-	/*
-	 * set the  i_blocks count to zero
-	 * so that the ext4_delete_inode does the
-	 * right job
-	 *
-	 * We don't need to take the i_lock because
-	 * the inode is not visible to user space.
-	 */
-	tmp_inode->i_blocks = 0;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] ext4: fix block swap procedure on migration V2
  2011-09-17 13:32 ` [PATCH 3/3] ext4: fix block swap procedure on migration V2 Dmitry Monakhov
@ 2011-09-19 16:29   ` Andreas Dilger
  2011-09-19 16:45     ` Dmitry Monakhov
  0 siblings, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2011-09-19 16:29 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, aneesh.kumar, tytso

On 2011-09-17, at 7:32 AM, Dmitry Monakhov wrote:
> Currently if system halted in the midle of migration procedure
> tmp_inode will be truncated on next mount during orphan list cleanup
> as a regular inode, but in fact it is the special one. Temporal inode
> refers to original's inode leaf blocks, but does not own it.
> And we should free only index blocks, but not a leaf one.
> We have to somehow flag migration inode as a special one, and then
> cleanup it accordingly. The flag in question should survive
> trough reboots.

I think this is potentially the wrong approach for extent migration.
We don't want the kernel to automatically delete these inodes, which
is what should happen if they are on the orphan list, since it means
the blocks will be marked unused by the bitmap, and further corruption
will result.

It would potentially be better to just leave the inode off the orphan
list, and in the _extremely_ rare case that there is a crash during
inode migration the inode is leaked until e2fsck is run.  The migrate
will happen at most once for any filesystem, so the loss of a single
inode per crash will not be a serious issue IMHO.

> This patch introduce new inode flag for this purpose. Yes I know
> that may seems not optimal to use didicated flag for such a rare case,
> and if you know a better way, or a good flag that we can share please
> say tell me. Also we don't need didicated (migration only) cleanup
> functions. It is reasonable just skip leaf blocks for inodes with
> migration flag enabled inode inside ext4_truncate(). So tmp_inode will
> be cleaned automatically on last iput()
> 
> - Introduce new inode flag for temporal inodes
> - Move cleanup logic to truncate.
> - Remove duplicated code. We no longer need it.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> ---
> fs/ext4/ext4.h     |    3 +
> fs/ext4/extents.c  |   10 ++-
> fs/ext4/indirect.c |    4 +
> fs/ext4/migrate.c  |  247 +++++-----------------------------------------------
> 4 files changed, 35 insertions(+), 229 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3aa2f7c..8a6f612 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -350,6 +350,7 @@ struct flex_groups {
> #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
> #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
> #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
> +#define EXT4_MIGRATE_FL			0x10000000 /* Inode used for data migration */
> #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
> 
> #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
> @@ -407,6 +408,7 @@ enum {
> 	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
> 	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
> 	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
> +	EXT4_INODE_MIGRATE	= 28,	/* Inode used for data migration */
> 	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
> };
> 
> @@ -453,6 +455,7 @@ static inline void ext4_check_flag_values(void)
> 	CHECK_FLAG_VALUE(EXTENTS);
> 	CHECK_FLAG_VALUE(EA_INODE);
> 	CHECK_FLAG_VALUE(EOFBLOCKS);
> +	CHECK_FLAG_VALUE(MIGRATE);
> 	CHECK_FLAG_VALUE(RESERVED);
> }
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 57cf568..0ac5a63 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2416,10 +2416,12 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> 		if (err)
> 			goto out;
> 
> -		err = ext4_remove_blocks(handle, inode, ex, a, b);
> -		if (err)
> -			goto out;
> -
> +		/* Migration inode does not own it's leaf blocks */
> +		if (!ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE)) {
> +			err = ext4_remove_blocks(handle, inode, ex, a, b);
> +			if (err)
> +				goto out;
> +		}
> 		if (num == 0) {
> 			/* this extent is removed; mark slot entirely unused */
> 			ext4_ext_store_pblock(ex, 0);
> diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
> index 0962642..4581d0e 100644
> --- a/fs/ext4/indirect.c
> +++ b/fs/ext4/indirect.c
> @@ -1147,6 +1147,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
> 					       for current block */
> 	int err = 0;
> 
> +	/* Migration inode does not own it's data blocks */
> +	if (ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE))
> +		return;
> +
> 	if (this_bh) {				/* For indirect block */
> 		BUFFER_TRACE(this_bh, "get_write_access");
> 		err = ext4_journal_get_write_access(handle, this_bh);
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index 74f2900..a75e40d 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -24,6 +24,7 @@
> struct migrate_struct {
> 	ext4_lblk_t first_block, last_block, curr_block;
> 	ext4_fsblk_t first_pblock, last_pblock;
> +	blkcnt_t ind_blocks;
> 
> };
> 
> @@ -135,6 +136,7 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
> 			lb->curr_block++;
> 		}
> 	}
> +	lb->ind_blocks++;
> 	put_bh(bh);
> 	return retval;
> 
> @@ -164,6 +166,7 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
> 			lb->curr_block += max_entries;
> 		}
> 	}
> +	lb->ind_blocks++;
> 	put_bh(bh);
> 	return retval;
> 
> @@ -193,144 +196,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
> 			lb->curr_block += max_entries * max_entries;
> 		}
> 	}
> +	lb->ind_blocks++;
> 	put_bh(bh);
> 	return retval;
> 
> }
> 
> -static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
> -{
> -	int retval = 0, needed;
> -
> -	if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
> -		return 0;
> -	/*
> -	 * We are freeing a blocks. During this we touch
> -	 * superblock, group descriptor and block bitmap.
> -	 * So allocate a credit of 3. We may update
> -	 * quota (user and group).
> -	 */
> -	needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
> -
> -	if (ext4_journal_extend(handle, needed) != 0)
> -		retval = ext4_journal_restart(handle, needed);
> -
> -	return retval;
> -}
> -
> -static int free_dind_blocks(handle_t *handle,
> -				struct inode *inode, __le32 i_data)
> -{
> -	int i;
> -	__le32 *tmp_idata;
> -	struct buffer_head *bh;
> -	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
> -
> -	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
> -	if (!bh)
> -		return -EIO;
> -
> -	tmp_idata = (__le32 *)bh->b_data;
> -	for (i = 0; i < max_entries; i++) {
> -		if (tmp_idata[i]) {
> -			extend_credit_for_blkdel(handle, inode);
> -			ext4_free_blocks(handle, inode, NULL,
> -					 le32_to_cpu(tmp_idata[i]), 1,
> -					 EXT4_FREE_BLOCKS_METADATA |
> -					 EXT4_FREE_BLOCKS_FORGET);
> -		}
> -	}
> -	put_bh(bh);
> -	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
> -			 EXT4_FREE_BLOCKS_METADATA |
> -			 EXT4_FREE_BLOCKS_FORGET);
> -	return 0;
> -}
> -
> -static int free_tind_blocks(handle_t *handle,
> -				struct inode *inode, __le32 i_data)
> -{
> -	int i, retval = 0;
> -	__le32 *tmp_idata;
> -	struct buffer_head *bh;
> -	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
> -
> -	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
> -	if (!bh)
> -		return -EIO;
> -
> -	tmp_idata = (__le32 *)bh->b_data;
> -	for (i = 0; i < max_entries; i++) {
> -		if (tmp_idata[i]) {
> -			retval = free_dind_blocks(handle,
> -					inode, tmp_idata[i]);
> -			if (retval) {
> -				put_bh(bh);
> -				return retval;
> -			}
> -		}
> -	}
> -	put_bh(bh);
> -	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
> -			 EXT4_FREE_BLOCKS_METADATA |
> -			 EXT4_FREE_BLOCKS_FORGET);
> -	return 0;
> -}
> -
> -static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
> -{
> -	int retval;
> -
> -	/* ei->i_data[EXT4_IND_BLOCK] */
> -	if (i_data[0]) {
> -		extend_credit_for_blkdel(handle, inode);
> -		ext4_free_blocks(handle, inode, NULL,
> -				le32_to_cpu(i_data[0]), 1,
> -				 EXT4_FREE_BLOCKS_METADATA |
> -				 EXT4_FREE_BLOCKS_FORGET);
> -	}
> -
> -	/* ei->i_data[EXT4_DIND_BLOCK] */
> -	if (i_data[1]) {
> -		retval = free_dind_blocks(handle, inode, i_data[1]);
> -		if (retval)
> -			return retval;
> -	}
> -
> -	/* ei->i_data[EXT4_TIND_BLOCK] */
> -	if (i_data[2]) {
> -		retval = free_tind_blocks(handle, inode, i_data[2]);
> -		if (retval)
> -			return retval;
> -	}
> -	return 0;
> -}
> -
> static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
> -						struct inode *tmp_inode)
> +				struct inode *tmp_inode,
> +				struct migrate_struct *mreq)
> {
> 	int retval;
> -	__le32	i_data[3];
> +	__le32  i_data[EXT4_N_BLOCKS];
> 	struct ext4_inode_info *ei = EXT4_I(inode);
> 	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
> -
> 	/*
> -	 * One credit accounted for writing the
> -	 * i_data field of the original inode
> +	 * Two credits accounted for writing the
> +	 * i_data field of the two inodes
> 	 */
> -	retval = ext4_journal_extend(handle, 1);
> -	if (retval) {
> +	if (ext4_journal_extend(handle, 2) != 0) {
> 		retval = ext4_journal_restart(handle, 1);
> 		if (retval)
> 			goto err_out;
> 	}
> -
> -	i_data[0] = ei->i_data[EXT4_IND_BLOCK];
> -	i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
> -	i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
> -
> +	memcpy(i_data, ei->i_data, sizeof(ei->i_data));
> 	down_write(&EXT4_I(inode)->i_data_sem);
> 	/*
> 	 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
> @@ -345,89 +234,31 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
> 		ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
> 	/*
> 	 * We have the extent map build with the tmp inode.
> -	 * Now copy the i_data across
> +	 * Now swap i_data maps
> 	 */
> 	ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
> 	memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
> -
> +	ext4_clear_inode_flag(tmp_inode, EXT4_INODE_EXTENTS);
> +	memcpy(tmp_ei->i_data, i_data, sizeof(ei->i_data));
> 	/*
> 	 * Update i_blocks with the new blocks that got
> 	 * allocated while adding extents for extent index
> 	 * blocks.
> 	 *
> -	 * While converting to extents we need not
> -	 * update the orignal inode i_blocks for extent blocks
> -	 * via quota APIs. The quota update happened via tmp_inode already.
> +	 * While converting to extents new meta_data blocks was accounted for
> +	 * tmp_inode, swap counter info with original inode.
> 	 */
> +	mreq->ind_blocks <<= (inode->i_blkbits - 9);
> 	spin_lock(&inode->i_lock);
> -	inode->i_blocks += tmp_inode->i_blocks;
> +	inode->i_blocks += tmp_inode->i_blocks - mreq->ind_blocks;
> 	spin_unlock(&inode->i_lock);
> +	tmp_inode->i_blocks = mreq->ind_blocks;
> 	up_write(&EXT4_I(inode)->i_data_sem);
> -
> -	/*
> -	 * We mark the inode dirty after, because we decrement the
> -	 * i_blocks when freeing the indirect meta-data blocks
> -	 */
> -	retval = free_ind_block(handle, inode, i_data);
> 	ext4_mark_inode_dirty(handle, inode);
> -
> err_out:
> 	return retval;
> }
> 
> -static int free_ext_idx(handle_t *handle, struct inode *inode,
> -					struct ext4_extent_idx *ix)
> -{
> -	int i, retval = 0;
> -	ext4_fsblk_t block;
> -	struct buffer_head *bh;
> -	struct ext4_extent_header *eh;
> -
> -	block = ext4_idx_pblock(ix);
> -	bh = sb_bread(inode->i_sb, block);
> -	if (!bh)
> -		return -EIO;
> -
> -	eh = (struct ext4_extent_header *)bh->b_data;
> -	if (eh->eh_depth != 0) {
> -		ix = EXT_FIRST_INDEX(eh);
> -		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
> -			retval = free_ext_idx(handle, inode, ix);
> -			if (retval)
> -				break;
> -		}
> -	}
> -	put_bh(bh);
> -	extend_credit_for_blkdel(handle, inode);
> -	ext4_free_blocks(handle, inode, NULL, block, 1,
> -			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> -	return retval;
> -}
> -
> -/*
> - * Free the extent meta data blocks only
> - */
> -static int free_ext_block(handle_t *handle, struct inode *inode)
> -{
> -	int i, retval = 0;
> -	struct ext4_inode_info *ei = EXT4_I(inode);
> -	struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
> -	struct ext4_extent_idx *ix;
> -	if (eh->eh_depth == 0)
> -		/*
> -		 * No extra blocks allocated for extent meta data
> -		 */
> -		return 0;
> -	ix = EXT_FIRST_INDEX(eh);
> -	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
> -		retval = free_ext_idx(handle, inode, ix);
> -		if (retval)
> -			return retval;
> -	}
> -	return retval;
> -
> -}
> -
> int ext4_ext_migrate(struct inode *inode)
> {
> 	handle_t *handle;
> @@ -481,7 +312,7 @@ int ext4_ext_migrate(struct inode *inode)
> 	 * when we drop inode reference.
> 	 */
> 	tmp_inode->i_nlink = 0;
> -
> +	ext4_set_inode_flag(tmp_inode, EXT4_INODE_MIGRATE);
> 	ext4_ext_tree_init(handle, tmp_inode);
> 	ext4_orphan_add(handle, tmp_inode);
> 	ext4_journal_stop(handle);
> @@ -557,44 +388,10 @@ int ext4_ext_migrate(struct inode *inode)
> 	 * Build the last extent
> 	 */
> 	retval = finish_range(handle, tmp_inode, &lb);
> +	if (!retval)
> +		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode, &lb);
> err_out:
> -	if (retval)
> -		/*
> -		 * Failure case delete the extent information with the
> -		 * tmp_inode
> -		 */
> -		free_ext_block(handle, tmp_inode);
> -	else {
> -		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
> -		if (retval)
> -			/*
> -			 * if we fail to swap inode data free the extent
> -			 * details of the tmp inode
> -			 */
> -			free_ext_block(handle, tmp_inode);
> -	}
> 
> -	/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
> -	if (ext4_journal_extend(handle, 1) != 0)
> -		ext4_journal_restart(handle, 1);
> -
> -	/*
> -	 * Mark the tmp_inode as of size zero
> -	 */
> -	i_size_write(tmp_inode, 0);
> -
> -	/*
> -	 * set the  i_blocks count to zero
> -	 * so that the ext4_delete_inode does the
> -	 * right job
> -	 *
> -	 * We don't need to take the i_lock because
> -	 * the inode is not visible to user space.
> -	 */
> -	tmp_inode->i_blocks = 0;
> -
> -	/* Reset the extent details */
> -	ext4_ext_tree_init(handle, tmp_inode);
> 	ext4_journal_stop(handle);
> out:
> 	unlock_new_inode(tmp_inode);
> -- 
> 1.7.2.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Cheers, Andreas






^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] ext4: fix block swap procedure on migration V2
  2011-09-19 16:29   ` Andreas Dilger
@ 2011-09-19 16:45     ` Dmitry Monakhov
  2011-10-29 12:54       ` Ted Ts'o
  0 siblings, 1 reply; 9+ messages in thread
From: Dmitry Monakhov @ 2011-09-19 16:45 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: linux-ext4, aneesh.kumar, tytso

On Mon, 19 Sep 2011 10:29:24 -0600, Andreas Dilger <adilger@dilger.ca> wrote:
> On 2011-09-17, at 7:32 AM, Dmitry Monakhov wrote:
> > Currently if system halted in the midle of migration procedure
> > tmp_inode will be truncated on next mount during orphan list cleanup
> > as a regular inode, but in fact it is the special one. Temporal inode
> > refers to original's inode leaf blocks, but does not own it.
> > And we should free only index blocks, but not a leaf one.
> > We have to somehow flag migration inode as a special one, and then
> > cleanup it accordingly. The flag in question should survive
> > trough reboots.
> 
> I think this is potentially the wrong approach for extent migration.
> We don't want the kernel to automatically delete these inodes, which
> is what should happen if they are on the orphan list, since it means
> the blocks will be marked unused by the bitmap, and further corruption
> will result.
> 
> It would potentially be better to just leave the inode off the orphan
> list, and in the _extremely_ rare case that there is a crash during
> inode migration the inode is leaked until e2fsck is run.  The migrate
> will happen at most once for any filesystem, so the loss of a single
> inode per crash will not be a serious issue IMHO.
But we still need to tell e2fsck that it is not just an average inode,
and it should be cleaned up without touching it's data blocks. Otherwise
fsck will complain about blocks are referenced by multiple inodes, which
is really scary message. So IMHO we still need persistent flag. 
> 
> > This patch introduce new inode flag for this purpose. Yes I know
> > that may seems not optimal to use didicated flag for such a rare case,
> > and if you know a better way, or a good flag that we can share please
> > say tell me. Also we don't need didicated (migration only) cleanup
> > functions. It is reasonable just skip leaf blocks for inodes with
> > migration flag enabled inode inside ext4_truncate(). So tmp_inode will
> > be cleaned automatically on last iput()
> > 
> > - Introduce new inode flag for temporal inodes
> > - Move cleanup logic to truncate.
> > - Remove duplicated code. We no longer need it.
> > 
> > Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> > ---
> > fs/ext4/ext4.h     |    3 +
> > fs/ext4/extents.c  |   10 ++-
> > fs/ext4/indirect.c |    4 +
> > fs/ext4/migrate.c  |  247 +++++-----------------------------------------------
> > 4 files changed, 35 insertions(+), 229 deletions(-)
> > 
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 3aa2f7c..8a6f612 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -350,6 +350,7 @@ struct flex_groups {
> > #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
> > #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
> > #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
> > +#define EXT4_MIGRATE_FL			0x10000000 /* Inode used for data migration */
> > #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
> > 
> > #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
> > @@ -407,6 +408,7 @@ enum {
> > 	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
> > 	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
> > 	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
> > +	EXT4_INODE_MIGRATE	= 28,	/* Inode used for data migration */
> > 	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
> > };
> > 
> > @@ -453,6 +455,7 @@ static inline void ext4_check_flag_values(void)
> > 	CHECK_FLAG_VALUE(EXTENTS);
> > 	CHECK_FLAG_VALUE(EA_INODE);
> > 	CHECK_FLAG_VALUE(EOFBLOCKS);
> > +	CHECK_FLAG_VALUE(MIGRATE);
> > 	CHECK_FLAG_VALUE(RESERVED);
> > }
> > 
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 57cf568..0ac5a63 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -2416,10 +2416,12 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> > 		if (err)
> > 			goto out;
> > 
> > -		err = ext4_remove_blocks(handle, inode, ex, a, b);
> > -		if (err)
> > -			goto out;
> > -
> > +		/* Migration inode does not own it's leaf blocks */
> > +		if (!ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE)) {
> > +			err = ext4_remove_blocks(handle, inode, ex, a, b);
> > +			if (err)
> > +				goto out;
> > +		}
> > 		if (num == 0) {
> > 			/* this extent is removed; mark slot entirely unused */
> > 			ext4_ext_store_pblock(ex, 0);
> > diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
> > index 0962642..4581d0e 100644
> > --- a/fs/ext4/indirect.c
> > +++ b/fs/ext4/indirect.c
> > @@ -1147,6 +1147,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
> > 					       for current block */
> > 	int err = 0;
> > 
> > +	/* Migration inode does not own it's data blocks */
> > +	if (ext4_test_inode_flag(inode, EXT4_INODE_MIGRATE))
> > +		return;
> > +
> > 	if (this_bh) {				/* For indirect block */
> > 		BUFFER_TRACE(this_bh, "get_write_access");
> > 		err = ext4_journal_get_write_access(handle, this_bh);
> > diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> > index 74f2900..a75e40d 100644
> > --- a/fs/ext4/migrate.c
> > +++ b/fs/ext4/migrate.c
> > @@ -24,6 +24,7 @@
> > struct migrate_struct {
> > 	ext4_lblk_t first_block, last_block, curr_block;
> > 	ext4_fsblk_t first_pblock, last_pblock;
> > +	blkcnt_t ind_blocks;
> > 
> > };
> > 
> > @@ -135,6 +136,7 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
> > 			lb->curr_block++;
> > 		}
> > 	}
> > +	lb->ind_blocks++;
> > 	put_bh(bh);
> > 	return retval;
> > 
> > @@ -164,6 +166,7 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
> > 			lb->curr_block += max_entries;
> > 		}
> > 	}
> > +	lb->ind_blocks++;
> > 	put_bh(bh);
> > 	return retval;
> > 
> > @@ -193,144 +196,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
> > 			lb->curr_block += max_entries * max_entries;
> > 		}
> > 	}
> > +	lb->ind_blocks++;
> > 	put_bh(bh);
> > 	return retval;
> > 
> > }
> > 
> > -static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
> > -{
> > -	int retval = 0, needed;
> > -
> > -	if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
> > -		return 0;
> > -	/*
> > -	 * We are freeing a blocks. During this we touch
> > -	 * superblock, group descriptor and block bitmap.
> > -	 * So allocate a credit of 3. We may update
> > -	 * quota (user and group).
> > -	 */
> > -	needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
> > -
> > -	if (ext4_journal_extend(handle, needed) != 0)
> > -		retval = ext4_journal_restart(handle, needed);
> > -
> > -	return retval;
> > -}
> > -
> > -static int free_dind_blocks(handle_t *handle,
> > -				struct inode *inode, __le32 i_data)
> > -{
> > -	int i;
> > -	__le32 *tmp_idata;
> > -	struct buffer_head *bh;
> > -	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
> > -
> > -	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
> > -	if (!bh)
> > -		return -EIO;
> > -
> > -	tmp_idata = (__le32 *)bh->b_data;
> > -	for (i = 0; i < max_entries; i++) {
> > -		if (tmp_idata[i]) {
> > -			extend_credit_for_blkdel(handle, inode);
> > -			ext4_free_blocks(handle, inode, NULL,
> > -					 le32_to_cpu(tmp_idata[i]), 1,
> > -					 EXT4_FREE_BLOCKS_METADATA |
> > -					 EXT4_FREE_BLOCKS_FORGET);
> > -		}
> > -	}
> > -	put_bh(bh);
> > -	extend_credit_for_blkdel(handle, inode);
> > -	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
> > -			 EXT4_FREE_BLOCKS_METADATA |
> > -			 EXT4_FREE_BLOCKS_FORGET);
> > -	return 0;
> > -}
> > -
> > -static int free_tind_blocks(handle_t *handle,
> > -				struct inode *inode, __le32 i_data)
> > -{
> > -	int i, retval = 0;
> > -	__le32 *tmp_idata;
> > -	struct buffer_head *bh;
> > -	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
> > -
> > -	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
> > -	if (!bh)
> > -		return -EIO;
> > -
> > -	tmp_idata = (__le32 *)bh->b_data;
> > -	for (i = 0; i < max_entries; i++) {
> > -		if (tmp_idata[i]) {
> > -			retval = free_dind_blocks(handle,
> > -					inode, tmp_idata[i]);
> > -			if (retval) {
> > -				put_bh(bh);
> > -				return retval;
> > -			}
> > -		}
> > -	}
> > -	put_bh(bh);
> > -	extend_credit_for_blkdel(handle, inode);
> > -	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
> > -			 EXT4_FREE_BLOCKS_METADATA |
> > -			 EXT4_FREE_BLOCKS_FORGET);
> > -	return 0;
> > -}
> > -
> > -static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
> > -{
> > -	int retval;
> > -
> > -	/* ei->i_data[EXT4_IND_BLOCK] */
> > -	if (i_data[0]) {
> > -		extend_credit_for_blkdel(handle, inode);
> > -		ext4_free_blocks(handle, inode, NULL,
> > -				le32_to_cpu(i_data[0]), 1,
> > -				 EXT4_FREE_BLOCKS_METADATA |
> > -				 EXT4_FREE_BLOCKS_FORGET);
> > -	}
> > -
> > -	/* ei->i_data[EXT4_DIND_BLOCK] */
> > -	if (i_data[1]) {
> > -		retval = free_dind_blocks(handle, inode, i_data[1]);
> > -		if (retval)
> > -			return retval;
> > -	}
> > -
> > -	/* ei->i_data[EXT4_TIND_BLOCK] */
> > -	if (i_data[2]) {
> > -		retval = free_tind_blocks(handle, inode, i_data[2]);
> > -		if (retval)
> > -			return retval;
> > -	}
> > -	return 0;
> > -}
> > -
> > static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
> > -						struct inode *tmp_inode)
> > +				struct inode *tmp_inode,
> > +				struct migrate_struct *mreq)
> > {
> > 	int retval;
> > -	__le32	i_data[3];
> > +	__le32  i_data[EXT4_N_BLOCKS];
> > 	struct ext4_inode_info *ei = EXT4_I(inode);
> > 	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
> > -
> > 	/*
> > -	 * One credit accounted for writing the
> > -	 * i_data field of the original inode
> > +	 * Two credits accounted for writing the
> > +	 * i_data field of the two inodes
> > 	 */
> > -	retval = ext4_journal_extend(handle, 1);
> > -	if (retval) {
> > +	if (ext4_journal_extend(handle, 2) != 0) {
> > 		retval = ext4_journal_restart(handle, 1);
> > 		if (retval)
> > 			goto err_out;
> > 	}
> > -
> > -	i_data[0] = ei->i_data[EXT4_IND_BLOCK];
> > -	i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
> > -	i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
> > -
> > +	memcpy(i_data, ei->i_data, sizeof(ei->i_data));
> > 	down_write(&EXT4_I(inode)->i_data_sem);
> > 	/*
> > 	 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
> > @@ -345,89 +234,31 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
> > 		ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
> > 	/*
> > 	 * We have the extent map build with the tmp inode.
> > -	 * Now copy the i_data across
> > +	 * Now swap i_data maps
> > 	 */
> > 	ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
> > 	memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
> > -
> > +	ext4_clear_inode_flag(tmp_inode, EXT4_INODE_EXTENTS);
> > +	memcpy(tmp_ei->i_data, i_data, sizeof(ei->i_data));
> > 	/*
> > 	 * Update i_blocks with the new blocks that got
> > 	 * allocated while adding extents for extent index
> > 	 * blocks.
> > 	 *
> > -	 * While converting to extents we need not
> > -	 * update the orignal inode i_blocks for extent blocks
> > -	 * via quota APIs. The quota update happened via tmp_inode already.
> > +	 * While converting to extents new meta_data blocks was accounted for
> > +	 * tmp_inode, swap counter info with original inode.
> > 	 */
> > +	mreq->ind_blocks <<= (inode->i_blkbits - 9);
> > 	spin_lock(&inode->i_lock);
> > -	inode->i_blocks += tmp_inode->i_blocks;
> > +	inode->i_blocks += tmp_inode->i_blocks - mreq->ind_blocks;
> > 	spin_unlock(&inode->i_lock);
> > +	tmp_inode->i_blocks = mreq->ind_blocks;
> > 	up_write(&EXT4_I(inode)->i_data_sem);
> > -
> > -	/*
> > -	 * We mark the inode dirty after, because we decrement the
> > -	 * i_blocks when freeing the indirect meta-data blocks
> > -	 */
> > -	retval = free_ind_block(handle, inode, i_data);
> > 	ext4_mark_inode_dirty(handle, inode);
> > -
> > err_out:
> > 	return retval;
> > }
> > 
> > -static int free_ext_idx(handle_t *handle, struct inode *inode,
> > -					struct ext4_extent_idx *ix)
> > -{
> > -	int i, retval = 0;
> > -	ext4_fsblk_t block;
> > -	struct buffer_head *bh;
> > -	struct ext4_extent_header *eh;
> > -
> > -	block = ext4_idx_pblock(ix);
> > -	bh = sb_bread(inode->i_sb, block);
> > -	if (!bh)
> > -		return -EIO;
> > -
> > -	eh = (struct ext4_extent_header *)bh->b_data;
> > -	if (eh->eh_depth != 0) {
> > -		ix = EXT_FIRST_INDEX(eh);
> > -		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
> > -			retval = free_ext_idx(handle, inode, ix);
> > -			if (retval)
> > -				break;
> > -		}
> > -	}
> > -	put_bh(bh);
> > -	extend_credit_for_blkdel(handle, inode);
> > -	ext4_free_blocks(handle, inode, NULL, block, 1,
> > -			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> > -	return retval;
> > -}
> > -
> > -/*
> > - * Free the extent meta data blocks only
> > - */
> > -static int free_ext_block(handle_t *handle, struct inode *inode)
> > -{
> > -	int i, retval = 0;
> > -	struct ext4_inode_info *ei = EXT4_I(inode);
> > -	struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
> > -	struct ext4_extent_idx *ix;
> > -	if (eh->eh_depth == 0)
> > -		/*
> > -		 * No extra blocks allocated for extent meta data
> > -		 */
> > -		return 0;
> > -	ix = EXT_FIRST_INDEX(eh);
> > -	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
> > -		retval = free_ext_idx(handle, inode, ix);
> > -		if (retval)
> > -			return retval;
> > -	}
> > -	return retval;
> > -
> > -}
> > -
> > int ext4_ext_migrate(struct inode *inode)
> > {
> > 	handle_t *handle;
> > @@ -481,7 +312,7 @@ int ext4_ext_migrate(struct inode *inode)
> > 	 * when we drop inode reference.
> > 	 */
> > 	tmp_inode->i_nlink = 0;
> > -
> > +	ext4_set_inode_flag(tmp_inode, EXT4_INODE_MIGRATE);
> > 	ext4_ext_tree_init(handle, tmp_inode);
> > 	ext4_orphan_add(handle, tmp_inode);
> > 	ext4_journal_stop(handle);
> > @@ -557,44 +388,10 @@ int ext4_ext_migrate(struct inode *inode)
> > 	 * Build the last extent
> > 	 */
> > 	retval = finish_range(handle, tmp_inode, &lb);
> > +	if (!retval)
> > +		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode, &lb);
> > err_out:
> > -	if (retval)
> > -		/*
> > -		 * Failure case delete the extent information with the
> > -		 * tmp_inode
> > -		 */
> > -		free_ext_block(handle, tmp_inode);
> > -	else {
> > -		retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
> > -		if (retval)
> > -			/*
> > -			 * if we fail to swap inode data free the extent
> > -			 * details of the tmp inode
> > -			 */
> > -			free_ext_block(handle, tmp_inode);
> > -	}
> > 
> > -	/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
> > -	if (ext4_journal_extend(handle, 1) != 0)
> > -		ext4_journal_restart(handle, 1);
> > -
> > -	/*
> > -	 * Mark the tmp_inode as of size zero
> > -	 */
> > -	i_size_write(tmp_inode, 0);
> > -
> > -	/*
> > -	 * set the  i_blocks count to zero
> > -	 * so that the ext4_delete_inode does the
> > -	 * right job
> > -	 *
> > -	 * We don't need to take the i_lock because
> > -	 * the inode is not visible to user space.
> > -	 */
> > -	tmp_inode->i_blocks = 0;
> > -
> > -	/* Reset the extent details */
> > -	ext4_ext_tree_init(handle, tmp_inode);
> > 	ext4_journal_stop(handle);
> > out:
> > 	unlock_new_inode(tmp_inode);
> > -- 
> > 1.7.2.3
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> Cheers, Andreas
> 
> 
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Ping...
  2011-09-17 13:32 [PATCH 1/3] ext4: migrate cleanup Dmitry Monakhov
  2011-09-17 13:32 ` [PATCH 2/3] ext4: fix quota accounting during migration Dmitry Monakhov
  2011-09-17 13:32 ` [PATCH 3/3] ext4: fix block swap procedure on migration V2 Dmitry Monakhov
@ 2011-10-28 20:34 ` Dmitry Monakhov
  2011-10-29 12:31 ` [PATCH 1/3] ext4: migrate cleanup Ted Ts'o
  3 siblings, 0 replies; 9+ messages in thread
From: Dmitry Monakhov @ 2011-10-28 20:34 UTC (permalink / raw)
  To: tytso; +Cc: aneesh.kumar, linux-ext4, adilger

On Sat, 17 Sep 2011 17:32:57 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
Can you please take a look at this three patches.
IMHO first two patches are simple and clean.
Last one may be good start point for discussion.
I do understand Andreas's point, and we may warn that fsck
is necessary, but still. We need MIGRATE flag to mark
temporal inode which not owns data blocks.
> This patch cleanup code a bit, actual logic not changed
> - Move current block pointer to migrate_structure, let's all
>   walk info will be in one structure.
> - Get rid of usless null ind-block ptr checks, caller already
>   does that check.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> ---
>  fs/ext4/migrate.c |  101 ++++++++++++++++++-----------------------------------
>  1 files changed, 34 insertions(+), 67 deletions(-)
> 
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index b57b98f..e263d78 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -21,13 +21,14 @@
>   * The contiguous blocks details which can be
>   * represented by a single extent
>   */
> -struct list_blocks_struct {
> -	ext4_lblk_t first_block, last_block;
> +struct migrate_struct {
> +	ext4_lblk_t first_block, last_block, curr_block;
>  	ext4_fsblk_t first_pblock, last_pblock;
> +
>  };
>  
>  static int finish_range(handle_t *handle, struct inode *inode,
> -				struct list_blocks_struct *lb)
> +				struct migrate_struct *lb)
>  
>  {
>  	int retval = 0, needed;
> @@ -87,8 +88,7 @@ err_out:
>  }
>  
>  static int update_extent_range(handle_t *handle, struct inode *inode,
> -				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
> -				struct list_blocks_struct *lb)
> +				ext4_fsblk_t pblock, struct migrate_struct *lb)
>  {
>  	int retval;
>  	/*
> @@ -96,9 +96,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
>  	 */
>  	if (lb->first_pblock &&
>  		(lb->last_pblock+1 == pblock) &&
> -		(lb->last_block+1 == blk_num)) {
> +		(lb->last_block+1 == lb->curr_block)) {
>  		lb->last_pblock = pblock;
> -		lb->last_block = blk_num;
> +		lb->last_block = lb->curr_block;
> +		lb->curr_block++;
>  		return 0;
>  	}
>  	/*
> @@ -106,64 +107,47 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
>  	 */
>  	retval = finish_range(handle, inode, lb);
>  	lb->first_pblock = lb->last_pblock = pblock;
> -	lb->first_block = lb->last_block = blk_num;
> -
> +	lb->first_block = lb->last_block = lb->curr_block;
> +	lb->curr_block++;
>  	return retval;
>  }
>  
>  static int update_ind_extent_range(handle_t *handle, struct inode *inode,
> -				   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
> -				   struct list_blocks_struct *lb)
> +				ext4_fsblk_t pblock, struct migrate_struct *lb)
>  {
>  	struct buffer_head *bh;
>  	__le32 *i_data;
>  	int i, retval = 0;
> -	ext4_lblk_t blk_count = *blk_nump;
>  	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
>  
> -	if (!pblock) {
> -		/* Only update the file block number */
> -		*blk_nump += max_entries;
> -		return 0;
> -	}
> -
>  	bh = sb_bread(inode->i_sb, pblock);
>  	if (!bh)
>  		return -EIO;
>  
>  	i_data = (__le32 *)bh->b_data;
> -	for (i = 0; i < max_entries; i++, blk_count++) {
> +	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
>  			retval = update_extent_range(handle, inode,
> -						le32_to_cpu(i_data[i]),
> -						blk_count, lb);
> +						le32_to_cpu(i_data[i]), lb);
>  			if (retval)
>  				break;
> +		} else {
> +			lb->curr_block++;
>  		}
>  	}
> -
> -	/* Update the file block number */
> -	*blk_nump = blk_count;
>  	put_bh(bh);
>  	return retval;
>  
>  }
>  
>  static int update_dind_extent_range(handle_t *handle, struct inode *inode,
> -				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
> -				    struct list_blocks_struct *lb)
> +				ext4_fsblk_t pblock, struct migrate_struct *lb)
>  {
>  	struct buffer_head *bh;
>  	__le32 *i_data;
>  	int i, retval = 0;
> -	ext4_lblk_t blk_count = *blk_nump;
>  	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
>  
> -	if (!pblock) {
> -		/* Only update the file block number */
> -		*blk_nump += max_entries * max_entries;
> -		return 0;
> -	}
>  	bh = sb_bread(inode->i_sb, pblock);
>  	if (!bh)
>  		return -EIO;
> @@ -172,38 +156,27 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
>  	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
>  			retval = update_ind_extent_range(handle, inode,
> -						le32_to_cpu(i_data[i]),
> -						&blk_count, lb);
> +						le32_to_cpu(i_data[i]), lb);
>  			if (retval)
>  				break;
>  		} else {
>  			/* Only update the file block number */
> -			blk_count += max_entries;
> +			lb->curr_block += max_entries;
>  		}
>  	}
> -
> -	/* Update the file block number */
> -	*blk_nump = blk_count;
>  	put_bh(bh);
>  	return retval;
>  
>  }
>  
>  static int update_tind_extent_range(handle_t *handle, struct inode *inode,
> -				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
> -				     struct list_blocks_struct *lb)
> +				ext4_fsblk_t pblock, struct migrate_struct *lb)
>  {
>  	struct buffer_head *bh;
>  	__le32 *i_data;
>  	int i, retval = 0;
> -	ext4_lblk_t blk_count = *blk_nump;
>  	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
>  
> -	if (!pblock) {
> -		/* Only update the file block number */
> -		*blk_nump += max_entries * max_entries * max_entries;
> -		return 0;
> -	}
>  	bh = sb_bread(inode->i_sb, pblock);
>  	if (!bh)
>  		return -EIO;
> @@ -211,17 +184,15 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
>  	i_data = (__le32 *)bh->b_data;
>  	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
> -			retval = update_dind_extent_range(handle, inode,
> -						le32_to_cpu(i_data[i]),
> -						&blk_count, lb);
> +			retval = update_ind_extent_range(handle, inode,
> +						le32_to_cpu(i_data[i]), lb);
>  			if (retval)
>  				break;
> -		} else
> +		} else {
>  			/* Only update the file block number */
> -			blk_count += max_entries * max_entries;
> +			lb->curr_block += max_entries * max_entries;
> +		}
>  	}
> -	/* Update the file block number */
> -	*blk_nump = blk_count;
>  	put_bh(bh);
>  	return retval;
>  
> @@ -462,10 +433,9 @@ int ext4_ext_migrate(struct inode *inode)
>  	handle_t *handle;
>  	int retval = 0, i;
>  	__le32 *i_data;
> -	ext4_lblk_t blk_count = 0;
>  	struct ext4_inode_info *ei;
>  	struct inode *tmp_inode = NULL;
> -	struct list_blocks_struct lb;
> +	struct migrate_struct lb;
>  	unsigned long max_entries;
>  	__u32 goal;
>  
> @@ -551,35 +521,32 @@ int ext4_ext_migrate(struct inode *inode)
>  
>  	/* 32 bit block address 4 bytes */
>  	max_entries = inode->i_sb->s_blocksize >> 2;
> -	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
> +	for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
>  		if (i_data[i]) {
>  			retval = update_extent_range(handle, tmp_inode,
> -						le32_to_cpu(i_data[i]),
> -						blk_count, &lb);
> +						le32_to_cpu(i_data[i]), &lb);
>  			if (retval)
>  				goto err_out;
> -		}
> +		} else
> +			lb.curr_block++;
>  	}
>  	if (i_data[EXT4_IND_BLOCK]) {
>  		retval = update_ind_extent_range(handle, tmp_inode,
> -					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
> -					&blk_count, &lb);
> +				le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
>  			if (retval)
>  				goto err_out;
>  	} else
> -		blk_count +=  max_entries;
> +		lb.curr_block += max_entries;
>  	if (i_data[EXT4_DIND_BLOCK]) {
>  		retval = update_dind_extent_range(handle, tmp_inode,
> -					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
> -					&blk_count, &lb);
> +				le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
>  			if (retval)
>  				goto err_out;
>  	} else
> -		blk_count += max_entries * max_entries;
> +		lb.curr_block += max_entries * max_entries;
>  	if (i_data[EXT4_TIND_BLOCK]) {
>  		retval = update_tind_extent_range(handle, tmp_inode,
> -					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
> -					&blk_count, &lb);
> +				le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
>  			if (retval)
>  				goto err_out;
>  	}
> -- 
> 1.7.2.3
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] ext4: migrate cleanup
  2011-09-17 13:32 [PATCH 1/3] ext4: migrate cleanup Dmitry Monakhov
                   ` (2 preceding siblings ...)
  2011-10-28 20:34 ` Ping Dmitry Monakhov
@ 2011-10-29 12:31 ` Ted Ts'o
  3 siblings, 0 replies; 9+ messages in thread
From: Ted Ts'o @ 2011-10-29 12:31 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, aneesh.kumar

On Sat, Sep 17, 2011 at 05:32:57PM +0400, Dmitry Monakhov wrote:
> This patch cleanup code a bit, actual logic not changed
> - Move current block pointer to migrate_structure, let's all
>   walk info will be in one structure.
> - Get rid of usless null ind-block ptr checks, caller already
>   does that check.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>

Applied, with one fixup:

> @@ -211,17 +184,15 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
>  	i_data = (__le32 *)bh->b_data;
>  	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
> -			retval = update_dind_extent_range(handle, inode,
> -						le32_to_cpu(i_data[i]),
> -						&blk_count, lb);
> +			retval = update_ind_extent_range(handle, inode,
> +						le32_to_cpu(i_data[i]), lb);

Surely this should remain a call to update_dind_extent_range(); I
assume it was a cut-and-paste editing error.

					- Ted

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] ext4: fix block swap procedure on migration V2
  2011-09-19 16:45     ` Dmitry Monakhov
@ 2011-10-29 12:54       ` Ted Ts'o
  0 siblings, 0 replies; 9+ messages in thread
From: Ted Ts'o @ 2011-10-29 12:54 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: Andreas Dilger, linux-ext4, aneesh.kumar

On Mon, Sep 19, 2011 at 08:45:32PM +0400, Dmitry Monakhov wrote:
> > It would potentially be better to just leave the inode off the orphan
> > list, and in the _extremely_ rare case that there is a crash during
> > inode migration the inode is leaked until e2fsck is run.  The migrate
> > will happen at most once for any filesystem, so the loss of a single
> > inode per crash will not be a serious issue IMHO.
>
> But we still need to tell e2fsck that it is not just an average inode,
> and it should be cleaned up without touching it's data blocks. Otherwise
> fsck will complain about blocks are referenced by multiple inodes, which
> is really scary message. So IMHO we still need persistent flag. 

The simplest way to solve this is to keep the n_links count on the
inode to be zero.  That will prevent e2fsck from considering the inode
as being in use, so it will simply not consider the blocks owned by
the temporary inode.  What this would mean is that we will leak the
temporary inode as well as the newly allocated index blocks until the
next e2fsck, but that's not a disaster, and it is a very rare case.

If we wanted to optimize things further, we could also add support to
__ext4_get_inode_loc(), ext4_mark_inode_dirty(), and
ext4_write_inode() so that if the inode number is some magic number,
such as inode number 1 (which as the bad block inode we never touch
directly), that it is to be considered an in-memory inode only and so
we don't even bother writing it to do disk or journaling to it.  That
way the migration code can use an in-memory inode which is allocated
by ext4_ext_migrate(), and whose only existence is a pointer in that
kernel stack frame to an in-memory inode, which has no existence on
disk.

With this optimized approach, the only thing we will leak is one,
maybe two extent tree index blocks *if* we happen to be migrating an
inode during the time of a system crash.  We could even avoid that by
adding support for blocks which are allocated in memory, but not (yet)
pushed out to disk, which we may need for some of the write path
improvements.  But if we don't get to that right away, I think that's
fine....

						- Ted

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] ext4: fix quota accounting during migration
  2011-09-17 13:32 ` [PATCH 2/3] ext4: fix quota accounting during migration Dmitry Monakhov
@ 2011-10-29 13:09   ` Ted Ts'o
  0 siblings, 0 replies; 9+ messages in thread
From: Ted Ts'o @ 2011-10-29 13:09 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, aneesh.kumar

On Sat, Sep 17, 2011 at 05:32:58PM +0400, Dmitry Monakhov wrote:
> tmp_inode should have same uid/gid as  original inode.
> Otherwise new metadata blocks will be accounted in to wrong
> quota-id which result in quota leak after indirect blocks swap.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>

Thanks, applied.

					- Ted

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-10-29 18:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-17 13:32 [PATCH 1/3] ext4: migrate cleanup Dmitry Monakhov
2011-09-17 13:32 ` [PATCH 2/3] ext4: fix quota accounting during migration Dmitry Monakhov
2011-10-29 13:09   ` Ted Ts'o
2011-09-17 13:32 ` [PATCH 3/3] ext4: fix block swap procedure on migration V2 Dmitry Monakhov
2011-09-19 16:29   ` Andreas Dilger
2011-09-19 16:45     ` Dmitry Monakhov
2011-10-29 12:54       ` Ted Ts'o
2011-10-28 20:34 ` Ping Dmitry Monakhov
2011-10-29 12:31 ` [PATCH 1/3] ext4: migrate cleanup Ted Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).