All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 8/9]ext4: Add the EXT4_IOC_MOVE_VICTIM ioctl
@ 2008-10-24 10:10 Akira Fujita
  0 siblings, 0 replies; only message in thread
From: Akira Fujita @ 2008-10-24 10:10 UTC (permalink / raw)
  To: linux-ext4, Theodore Tso, Mingming Cao; +Cc: linux-fsdevel

ext4: online defrag -- Add the EXT4_IOC_MOVE_VICTIM ioctl.

From: Akira Fujita <a-fujita@rs.jp.nec.com>

The EXT4_IOC_MOVE_VICTIM moves the victim extents into other block group.
Therefore the contiguous free space is made in the target block group.
This ioctl is used only in the force defrag (-f).

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/balloc.c       |    1 +
 fs/ext4/defrag.c       |  264 ++++++++++++++++++++++++++++++++++++++++++-----
 fs/ext4/ext4.h         |   20 ++++-
 fs/ext4/ext4_extents.h |    3 +
 fs/ext4/extents.c      |   52 ++++++++--
 fs/ext4/ioctl.c        |   16 +++-
 fs/ext4/mballoc.c      |    5 +
 fs/ext4/mballoc.h      |    1 +
 8 files changed, 322 insertions(+), 40 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b9821be..e791484 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -705,6 +705,7 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
 	ar.goal = goal;
 	ar.len = *count;
 	ar.logical = iblock;
+	ar.excepted_group = -1;

 	if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
 		/* enable in-core preallocation for data block allocation */
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 67030bc..20cb847 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -204,6 +204,92 @@ out:
 }

 /**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @target_filp:	target file
+ * @ext_info:		target extents array to move
+ *
+ * This function returns 0 if succeed, otherwise
+ * returns error value.
+ */
+int
+ext4_defrag_move_victim(struct file *target_filp,
+			struct ext4_extents_info *ext_info)
+{
+	struct inode *org_inode = target_filp->f_dentry->d_inode;
+	struct super_block *sb = org_inode->i_sb;
+	struct file victim_file;
+	struct dentry victim_dent;
+	struct inode *victim_inode;
+	struct ext4_extent_data ext;
+	ext4_fsblk_t goal = ext_info->goal;
+	ext4_group_t group;
+	ext4_grpblk_t grp_off;
+	int ret, i;
+
+	/* Setup dummy extent data */
+	ext.len = 0;
+
+	/* Get the inode of the victim file */
+	victim_inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(victim_inode))
+		return PTR_ERR(victim_inode);
+
+	/*
+	 * If the permission of victim inode has been changed
+	 * since we checked before, defrag will fail.
+	 * Because defrag won't be albe to move the extents of victim inode
+	 * to other block group.
+	 */
+	if (!capable(CAP_DAC_OVERRIDE)) {
+		if ((victim_inode->i_mode & S_IRUSR) != S_IRUSR)
+			return -EACCES;
+		if (current->fsuid != victim_inode->i_uid)
+			return -EACCES;
+	}
+
+	/* Setup file for the victim file */
+	victim_dent.d_inode = victim_inode;
+	victim_file.f_dentry = &victim_dent;
+	victim_file.f_mapping = victim_inode->i_mapping;
+
+	/*
+	 * If goal has not been set in the user space,
+	 * set the start offset of block group where victim inode is
+	 * located to goal.
+	 */
+	if (goal == -1) {
+		ext4_get_group_no_and_offset(victim_inode->i_sb,
+				ext_info->ext[0].start, &group, &grp_off);
+		goal = ext4_group_first_block_no(sb, group + 1);
+	}
+
+	for (i = 0; i < ext_info->entries; i++) {
+		/* Move original blocks to another block group */
+		ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+			ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Moving victim file failed. ino [%llu]\n",
+				ext_info->ino);
+			goto out;
+		}
+
+		/* Sync journal blocks */
+		ret = ext4_force_commit(sb);
+		if (ret) {
+			printk(KERN_ERR "ext4 defrag: "
+				"ext4_force_commit failed(%d)\n", ret);
+			goto out;
+		}
+	}
+
+out:
+	iput(victim_inode);
+	return ret;
+}
+
+/**
  * ext4_defrag_merge_across_blocks - Merge extents across leaf block
  *
  * @handle:		journal handle
@@ -213,6 +299,7 @@ out:
  * @start_ext:		first new extent to be merged
  * @new_ext:		middle of new extent to be merged
  * @end_ext:		last new extent to be merged
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -220,14 +307,20 @@ static int
 ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 		struct ext4_extent *o_start, struct ext4_extent *o_end,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext)
+		struct ext4_extent *end_ext, int phase)
 {
 	struct ext4_ext_path *org_path = NULL;
 	ext4_lblk_t eblock = 0;
 	int new_flag = 0;
 	int end_flag = 0;
+	int defrag_flag;
 	int err;

+	if (phase == DEFRAG_FORCE_VICTIM)
+		defrag_flag = 1;
+	else
+		defrag_flag = 0;
+
 	if (le16_to_cpu(start_ext->ee_len) &&
 		le16_to_cpu(new_ext->ee_len) &&
 		le16_to_cpu(end_ext->ee_len)) {
@@ -304,8 +397,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, org_inode,
-					org_path, new_ext);
+		err = ext4_ext_insert_extent_defrag(handle, org_inode,
+					org_path, new_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -318,8 +411,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, org_inode,
-					org_path, end_ext);
+		err = ext4_ext_insert_extent_defrag(handle, org_inode,
+					org_path, end_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -397,6 +490,7 @@ ext4_defrag_merge_inside_block(struct ext4_extent *o_start,
  * @new_ext:	middle of new extent to be merged
  * @end_ext:	last new extent to be merged
  * @replaced:	the number of blocks which will be replaced with new_ext
+ * @phase:	phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -405,7 +499,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
 		struct ext4_ext_path *org_path,
 		struct ext4_extent *o_start, struct ext4_extent *o_end,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext, ext4_fsblk_t replaced)
+		struct ext4_extent *end_ext, ext4_fsblk_t replaced, int phase)
 {
 	struct  ext4_extent_header *eh;
 	unsigned need_slots, slots_range;
@@ -443,7 +537,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,

 		ret = ext4_defrag_merge_across_blocks(handle, org_inode,
 					o_start, o_end, start_ext, new_ext,
-					end_ext);
+					end_ext, phase);
 		if (ret < 0)
 			return ret;
 	} else {
@@ -476,13 +570,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
  * @org_path:		path indicates first extent to be defraged
  * @dext:		destination extent
  * @from:		start offset on the target file
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
 ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 		struct ext4_ext_path *org_path, struct ext4_extent *dext,
-		ext4_lblk_t *from)
+		ext4_lblk_t *from, int phase)
 {
 	struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
 	struct ext4_extent new_ext, start_ext, end_ext;
@@ -583,7 +678,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 				+ le16_to_cpu(oext->ee_len) - 1) {
 			ret = ext4_defrag_merge_extents(handle, org_inode,
 					org_path, o_start, o_end, &start_ext,
-					&new_ext, &end_ext, replaced);
+					&new_ext, &end_ext, replaced, phase);
 			if (ret < 0)
 				return ret;

@@ -635,6 +730,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
  * @from:		block offset of org_inode
  * @dest_off:		block offset of dest_inode
  * @count:		block count to be replaced
+ * @phase:              phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  * Replace extents for blocks from "from" to "from + count - 1".
@@ -642,7 +738,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 static int
 ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 			struct inode *dest_inode, ext4_lblk_t from,
-			ext4_lblk_t dest_off, ext4_lblk_t count)
+			ext4_lblk_t dest_off, ext4_lblk_t count, int phase)
 {
 	struct ext4_ext_path *org_path = NULL;
 	struct ext4_ext_path *dest_path = NULL;
@@ -703,7 +799,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,

 		/* Loop for the original extent blocks */
 		err = ext4_defrag_leaf_block(handle, org_inode,
-						org_path, dext, &from);
+						org_path, dext, &from, phase);
 		if (err < 0)
 			goto out;

@@ -713,7 +809,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 		 * e.g. ext4_defrag_merge_extents()
 		 */
 		err = ext4_defrag_leaf_block(handle, dest_inode,
-					dest_path, swap_ext, &dest_off);
+					dest_path, swap_ext, &dest_off, -1);
 		if (err < 0)
 			goto out;

@@ -809,6 +905,7 @@ out:
  * @req_blocks:		contiguous blocks count we need
  * @iblock:		target file offset
  * @goal:		goal offset
+ * @phase:              phase of the force defrag mode
  *
  */
 static void
@@ -817,8 +914,22 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
 			struct ext4_ext_path *org_path,
 			struct ext4_ext_path *dest_path,
 			ext4_fsblk_t req_blocks, ext4_lblk_t iblock,
-			ext4_fsblk_t goal)
+			ext4_fsblk_t goal, int phase)
 {
+	ext4_group_t org_grp_no;
+	ext4_grpblk_t org_blk_off;
+	int org_depth = ext_depth(org_inode);
+
+	if (phase == DEFRAG_FORCE_VICTIM) {
+		ext4_get_group_no_and_offset(org_inode->i_sb,
+				ext_pblock(org_path[org_depth].p_ext),
+				&org_grp_no, &org_blk_off);
+		ar->excepted_group = org_grp_no;
+	} else {
+		/* Allocate contiguous blocks to any block group */
+		ar->excepted_group = -1;
+	}
+
 	ar->inode = dest_inode;
 	ar->len = req_blocks;
 	ar->logical = iblock;
@@ -882,6 +993,61 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
 }

 /**
+ * ext4_defrag_check_phase
+ * 	- Check condition of the allocated blocks (only force defrag mode)
+ *
+ * @ar:			allocation request for multiple block allocation
+ * @dest_grp_no:	block group num of the allocated blocks
+ * @goal_grp_no:	block group num of the destination of block allocation
+ * @alloc_total:	sum total of the allocated blocks
+ * @req_blocks:		contiguous blocks count we need
+ * @phase:              phase of the force defrag mode
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_check_phase(struct ext4_allocation_request *ar,
+			ext4_group_t dest_grp_no, ext4_group_t goal_grp_no,
+			ext4_fsblk_t alloc_total, ext4_lblk_t req_blocks,
+			int phase)
+{
+	int err = 0;
+
+	switch (phase) {
+	case DEFRAG_FORCE_TRY:
+		/* If there is not enough space, return -ENOSPC. */
+		if (ar->len != req_blocks)
+			/* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
+			err = -ENOSPC;
+		break;
+	case DEFRAG_FORCE_VICTIM:
+		/* We can't allocate new blocks in the same block group. */
+		if (dest_grp_no == ar->excepted_group) {
+			printk(KERN_ERR "ext4 defrag: Failed to allocate"
+					" victim file to other block group\n");
+			err = -ENOSPC;
+		}
+		break;
+	case DEFRAG_FORCE_GATHER:
+		/*
+		 * If the target free area made by moving victim
+		 * extents has been already used by other process,
+		 * defrag returns error value and fails.
+		 */
+		if (dest_grp_no != goal_grp_no
+		    || alloc_total != req_blocks) {
+			printk(KERN_ERR "ext4 defrag: The target contiguous"
+					" free area has been already used"
+					" by other process\n");
+			err = -EIO;
+		}
+		break;
+	}
+
+	return err;
+}
+
+/**
  * ext4_defrag_partial - Defrag a file per page
  *
  * @tmp_inode:			temporary inode
@@ -890,13 +1056,15 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
  * @dest_blk_offset:		block index on temporary file
  * @data_offset_in_page:	block index where data swapping starts
  * @block_len_in_page:		the number of blocks to be swapped
+ * @phase:			phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
 ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 			pgoff_t org_page_offset, ext4_lblk_t dest_blk_offset,
-			int data_offset_in_page, int block_len_in_page)
+			int data_offset_in_page, int block_len_in_page,
+			int phase)
 {
 	struct inode *org_inode = filp->f_dentry->d_inode;
 	struct address_space *mapping = org_inode->i_mapping;
@@ -979,7 +1147,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 	try_to_release_page(page, 0);
 	ret = ext4_defrag_replace_branches(handle, org_inode, tmp_inode,
 						org_blk_offset, dest_blk_offset,
-						block_len_in_page);
+						block_len_in_page, phase);
 	if (ret < 0)
 		goto out;

@@ -1030,6 +1198,7 @@ out:
  * @tar_end:		the last block number of the allocated blocks
  * @sum_tmp:		the extents count  in the allocated blocks
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns the values as below.
  *	0 (improved)
@@ -1039,7 +1208,7 @@ out:
 static int
 ext4_defrag_comp_ext_count(struct inode *org_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
-			int sum_tmp, ext4_fsblk_t goal)
+			int sum_tmp, ext4_fsblk_t goal, int phase)
 {
 	struct ext4_extent *ext = NULL;
 	int depth = ext_depth(org_inode);
@@ -1066,7 +1235,8 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
 			if (sum_org == sum_tmp && !goal) {
 				/* Not improved */
 				ret = 1;
-			} else if (sum_org < sum_tmp) {
+			} else if (sum_org < sum_tmp &&
+					phase != DEFRAG_FORCE_VICTIM) {
 				/* Fragment increased */
 				ret = -ENOSPC;
 				printk(KERN_ERR "ext4 defrag: "
@@ -1095,6 +1265,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
  * @req_blocks:		the number of blocks to allocate
  * @iblock:		file related offset
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns the value as below:
  *	0 (succeed)
@@ -1105,7 +1276,7 @@ static int
 ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t req_start,
 			ext4_lblk_t req_blocks, ext4_lblk_t iblock,
-			ext4_fsblk_t goal)
+			ext4_fsblk_t goal, int phase)
 {
 	handle_t *handle;
 	struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb);
@@ -1117,6 +1288,8 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 	ext4_fsblk_t newblock = 0;
 	ext4_lblk_t req_end = req_start + req_blocks - 1;
 	ext4_lblk_t rest_blocks = 0;
+	ext4_group_t dest_group_no, goal_group_no;
+	ext4_grpblk_t dest_blk_off, goal_blk_off;
 	int sum_tmp = 0;
 	int metadata = 1;
 	int ret;
@@ -1133,7 +1306,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,

 	/* Fill struct ext4_allocation_request with necessary info */
 	ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
-				dest_path, req_blocks, iblock, goal);
+				dest_path, req_blocks, iblock, goal, phase);

 	handle = ext4_journal_start(tmp_inode, 0);
 	if (IS_ERR(handle)) {
@@ -1141,6 +1314,9 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		goto out2;
 	}

+	ext4_get_group_no_and_offset(tmp_inode->i_sb, goal,
+				&goal_group_no, &goal_blk_off);
+
 	while (alloc_total != req_blocks) {
 		/* Allocate blocks */
 		ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
@@ -1150,9 +1326,21 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		/* Claimed blocks are already reserved */
 		EXT4_I(ar.inode)->i_delalloc_reserved_flag = 1;

+		ext4_get_group_no_and_offset(tmp_inode->i_sb, newblock,
+					&dest_group_no, &dest_blk_off);
+
 		alloc_total += ar.len;
 		rest_blocks = req_blocks - alloc_total;

+		/* the checks that done in force mode */
+		if (phase) {
+			ret = ext4_defrag_check_phase(&ar, dest_group_no,
+					goal_group_no, alloc_total,
+					req_blocks, phase);
+			if (ret < 0)
+				goto out;
+		}
+
 		newex.ee_block = cpu_to_le32(alloc_total - ar.len);
 		ext4_ext_store_pblock(&newex, newblock);
 		newex.ee_len = cpu_to_le16(ar.len);
@@ -1162,13 +1350,14 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		if (ret < 0)
 			goto out;

-		ar.goal = newblock + ar.len;
+		if (!phase)
+			ar.goal = newblock + ar.len;
 		ar.len = req_blocks - alloc_total;
 		sum_tmp++;
 	}

 	ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end,
-					sum_tmp, goal);
+					sum_tmp, goal, phase);

 out:
 	if (ret < 0 && ar.len)
@@ -1195,14 +1384,16 @@ out2:
  * ext4_defrag_check - Check the environment whether a defrag can be done
  *
  * @org_inode:		original inode
+ * @ext:		extent to be moved (only defrag force mode)
  * @defrag_size:	size of defrag in blocks
  * @goal:		pointer to block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
-ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
-		ext4_fsblk_t *goal)
+ext4_defrag_check(struct inode *org_inode, struct ext4_extent_data *ext,
+		ext4_lblk_t defrag_size, ext4_fsblk_t *goal, int phase)
 {

 	/* Ext4 online defrag supports only extent based file */
@@ -1223,6 +1414,19 @@ ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
 		return -EOPNOTSUPP;
 	}

+	if (phase == DEFRAG_FORCE_GATHER) {
+		if (!ext->len || ext->len < defrag_size) {
+			printk(KERN_ERR "ext4_defrag: "
+				"Invalid extent length\n");
+			return -EINVAL;
+		}
+		/*
+		 * Set the goal offset in the start offset of contiguous
+		 * free space that was made because vicim inode moved.
+		 */
+		*goal = ext->start;
+	}
+
 	return 0;
 }

@@ -1304,13 +1508,16 @@ out:
  * @block_start:	starting offset to defrag in blocks
  * @defrag_size:	size of defrag in blocks
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
+ * @ext:		extent to be moved (only defrag force mode)
  *
  * This function returns the number of blocks if succeed, otherwise
  * returns error value.
  */
 int
 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-		ext4_lblk_t defrag_size, ext4_fsblk_t goal)
+		ext4_lblk_t defrag_size, ext4_fsblk_t goal, int phase,
+		struct ext4_extent_data *ext)
 {
 	struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
 	struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL;
@@ -1325,7 +1532,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 	int block_len_in_page;

 	/* Check the filesystem environment whether defrag can be done */
-	ret = ext4_defrag_check(org_inode, defrag_size, &goal);
+	ret = ext4_defrag_check(org_inode, ext, defrag_size, &goal, phase);
 	if (ret < 0)
 		return ret;

@@ -1442,11 +1649,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,

 		ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode,
 					org_path, seq_start, seq_blocks,
-					block_start, goal);
+					block_start, goal, phase);

 		if (ret < 0) {
 			break;
-		} else if (ret == 1) {
+		} else if (ret == 1 && (!goal || (goal && !phase))) {
 			ret = 0;
 			seq_start = le32_to_cpu(ext_cur->ee_block);
 			goto CLEANUP;
@@ -1491,7 +1698,8 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 						org_page_offset,
 						dest_block_offset,
 						data_offset_in_page,
-						block_len_in_page);
+						block_len_in_page,
+						phase);
 			if (ret < 0)
 				goto out;

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c72703f..e0b09dc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -97,6 +97,11 @@ struct ext4_allocation_request {
 	unsigned long len;
 	/* flags. see above EXT4_MB_HINT_* */
 	unsigned long flags;
+	/*
+	 * for ext4 online defrag:
+	 * the block group which is excepted from allocation target
+	 */
+	long long excepted_group;
 };

 /*
@@ -306,6 +311,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC_SUPER_INFO		_IOR('f', 16, struct ext4_super_block)
 #define EXT4_IOC_FREE_BLOCKS_INFO	_IOWR('f', 17, struct ext4_extents_info)
 #define EXT4_IOC_FIEMAP_INO		_IOWR('f', 18, struct fiemap_ino)
+#define EXT4_IOC_MOVE_VICTIM		_IOW('f', 19, struct ext4_extents_info)

 /*
  * ioctl commands in 32 bit emulation
@@ -328,8 +334,15 @@ struct ext4_new_group_data {
  *
  * DEFRAG_MAX_ENT:	the maximum number of extents for exchanging between
  *			kernel-space and user-space per an ioctl
+ * DEFRAG_FORCE_TRY:	check whether we have free space fragmentation or not
+ * DEFRAG_FORCE_VICTIM:	move victim extents to make sufficient space
+ * DEFRAG_FORCE_GATHER:	move the target file into the free space made in the
+ *			DEFRAG_FORCE_VICTIM phase
  */
 #define DEFRAG_MAX_ENT		32
+#define DEFRAG_FORCE_TRY	1
+#define DEFRAG_FORCE_VICTIM	2
+#define DEFRAG_FORCE_GATHER	3

 struct ext4_extent_data {
 	ext4_lblk_t block;		/* start logical block number */
@@ -341,6 +354,8 @@ struct ext4_ext_defrag_data {
 	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
 	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
 	ext4_fsblk_t goal;		/* block offset for allocation */
+	int flag;			/* free space mode flag */
+	struct ext4_extent_data ext;
 };

 struct ext4_extents_info {
@@ -1190,8 +1205,11 @@ extern void ext4_inode_table_set(struct super_block *sb,
 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-			ext4_lblk_t defrag_size, ext4_fsblk_t goal);
+			ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+			int flag, struct ext4_extent_data *ext);
 extern int ext4_defrag_fiemap_ino(struct file *filp, unsigned long arg);
+extern int ext4_defrag_move_victim(struct file *target_filp,
+				struct ext4_extents_info *ext_info);

 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 39e043b..9b9dee9 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -250,6 +250,9 @@ extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
 extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 					struct ext4_ext_path *path,
 					ext4_lblk_t block);
+extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext, int defrag);

 #endif /* _EXT4_EXTENTS */

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dde234f..a17f53c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -186,11 +186,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 static ext4_fsblk_t
 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
-			struct ext4_extent *ex, int *err)
+			struct ext4_extent *ex, int *err,
+			ext4_fsblk_t defrag_goal)
 {
 	ext4_fsblk_t goal, newblock;

-	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	if (defrag_goal)
+		goal = defrag_goal;
+	else
+		goal = ext4_ext_find_goal(inode, path,
+					le32_to_cpu(ex->ee_block));
+
 	newblock = ext4_new_meta_block(handle, inode, goal, err);
 	return newblock;
 }
@@ -675,7 +681,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
-				struct ext4_extent *newext, int at)
+				struct ext4_extent *newext, int at,
+				ext4_fsblk_t defrag_goal)
 {
 	struct buffer_head *bh = NULL;
 	int depth = ext_depth(inode);
@@ -726,7 +733,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
 	for (a = 0; a < depth - at; a++) {
 		newblock = ext4_ext_new_meta_block(handle, inode, path,
-						   newext, &err);
+						   newext, &err, defrag_goal);
 		if (newblock == 0)
 			goto cleanup;
 		ablocks[a] = newblock;
@@ -913,7 +920,8 @@ cleanup:
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp = path;
 	struct ext4_extent_header *neh;
@@ -922,7 +930,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	ext4_fsblk_t newblock;
 	int err = 0;

-	newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
+	newblock = ext4_ext_new_meta_block(handle, inode, path,
+					   newext, &err, defrag_goal);
 	if (newblock == 0)
 		return err;

@@ -998,7 +1007,8 @@ out:
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp;
 	int depth, i, err = 0;
@@ -1018,7 +1028,8 @@ repeat:
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
-		err = ext4_ext_split(handle, inode, path, newext, i);
+		err = ext4_ext_split(handle, inode, path, newext, i,
+					defrag_goal);
 		if (err)
 			goto out;

@@ -1031,7 +1042,8 @@ repeat:
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		err = ext4_ext_grow_indepth(handle, inode, path,
+						newext, defrag_goal);
 		if (err)
 			goto out;

@@ -1477,6 +1489,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
 				struct ext4_extent *newext)
 {
+	return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int defrag)
+{
 	struct ext4_extent_header *eh;
 	struct ext4_extent *ex, *fex;
 	struct ext4_extent *nearex; /* nearest extent */
@@ -1484,6 +1509,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	int depth, len, err;
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
+	ext4_fsblk_t defrag_goal;

 	BUG_ON(ext4_ext_get_actual_len(newext) == 0);
 	depth = ext_depth(inode);
@@ -1544,11 +1570,16 @@ repeat:
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 	}

+	if (defrag)
+		defrag_goal = ext_pblock(newext);
+	else
+		defrag_goal = 0;
 	/*
 	 * There is no free space in the found leaf.
 	 * We're gonna add a new leaf in the tree.
 	 */
-	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	err = ext4_ext_create_new_leaf(handle, inode, path,
+					newext, defrag_goal);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);
@@ -2845,6 +2876,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	ar.goal = ext4_ext_find_goal(inode, path, iblock);
 	ar.logical = iblock;
 	ar.len = allocated;
+	ar.excepted_group = -1;
 	if (S_ISREG(inode->i_mode))
 		ar.flags = EXT4_MB_HINT_DATA;
 	else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b69b54a..15050d3 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -242,7 +242,8 @@ setversion_out:
 		}

 		err = ext4_defrag(filp, defrag.start_offset,
-			defrag.defrag_size, defrag.goal);
+			defrag.defrag_size, defrag.goal, defrag.flag,
+			&defrag.ext);
 		return err;
 	}

@@ -283,6 +284,19 @@ setversion_out:
 		return err;
 	}

+	case EXT4_IOC_MOVE_VICTIM: {
+		struct ext4_extents_info ext_info;
+		int err;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+						sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_move_victim(filp, &ext_info);
+		return err;
+	}
+
 	case EXT4_IOC_GROUP_ADD: {
 		struct ext4_new_group_data input;
 		struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index eebcf7c..d523e42 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1774,6 +1774,10 @@ repeat:
 			if (group == EXT4_SB(sb)->s_groups_count)
 				group = 0;

+			if (ac->ac_excepted_group != -1 &&
+			    group == ac->ac_excepted_group)
+				continue;
+
 			/* quick check to skip empty groups */
 			grp = ext4_get_group_info(ac->ac_sb, group);
 			if (grp->bb_free == 0)
@@ -4069,6 +4073,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ac->ac_bitmap_page = NULL;
 	ac->ac_buddy_page = NULL;
 	ac->ac_lg = NULL;
+	ac->ac_excepted_group = ar->excepted_group;

 	/* we have to define context: we'll we work with a file or
 	 * locality group. this is a policy, actually */
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b5dff1f..557308a 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -219,6 +219,7 @@ struct ext4_allocation_context {
 	struct page *ac_buddy_page;
 	struct ext4_prealloc_space *ac_pa;
 	struct ext4_locality_group *ac_lg;
+	long long ac_excepted_group;
 };

 #define AC_STATUS_CONTINUE	1

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2008-10-24 10:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-24 10:10 [RFC][PATCH 8/9]ext4: Add the EXT4_IOC_MOVE_VICTIM ioctl Akira Fujita

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.