All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 10/12]ext4: Add the EXT4_IOC_MOVE_VICTIM ioctl
@ 2008-09-27  7:27 Akira Fujita
  0 siblings, 0 replies; only message in thread
From: Akira Fujita @ 2008-09-27  7:27 UTC (permalink / raw)
  To: linux-ext4; +Cc: linux-fsdevel

ext4: online defrag -- Add the EXT4_IOC_MOVE_VICTIM ioctl.

From: Akira Fujita <a-fujita@rs.jp.nec.com>

The EXT4_IOC_MOVE_VICTIM moves the victim extents into other block group.
Therefore the contiguous free space is made in the target block group.
This ioctl is used only in the force defrag (-f).

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/balloc.c       |    1 +
 fs/ext4/defrag.c       |  262 ++++++++++++++++++++++++++++++++++++++++++------
 fs/ext4/ext4.h         |   18 +++-
 fs/ext4/ext4_extents.h |    5 +
 fs/ext4/extents.c      |   54 ++++++++--
 fs/ext4/ioctl.c        |    3 +-
 fs/ext4/mballoc.c      |    5 +
 fs/ext4/mballoc.h      |    1 +
 8 files changed, 307 insertions(+), 42 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 2344a96..969e996 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -2026,6 +2026,7 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
 	ar.goal = goal;
 	ar.len = *count;
 	ar.logical = iblock;
+	ar.excepted_group = -1;

 	if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
 		/* enable in-core preallocation for data block allocation */
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 26fb4a6..a2b17c5 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -380,6 +380,80 @@ err:
 }

 /**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @target_filp:	target file
+ * @ext_info:		target extents array to move
+ *
+ * This function returns 0 if succeed, otherwise
+ * returns error value.
+ */
+static int
+ext4_defrag_move_victim(struct file *target_filp,
+			struct ext4_extents_info *ext_info)
+{
+	struct inode *org_inode = target_filp->f_dentry->d_inode;
+	struct super_block *sb = org_inode->i_sb;
+	struct file victim_file;
+	struct dentry victim_dent;
+	struct inode *victim_inode;
+	struct ext4_extent_data ext;
+	ext4_fsblk_t goal = ext_info->goal;
+	ext4_group_t group;
+	ext4_grpblk_t grp_off;
+	int ret, i;
+
+	/* Setup dummy extent data */
+	ext.len = 0;
+
+	/* Get the inode of the victim file */
+	victim_inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(victim_inode))
+		return PTR_ERR(victim_inode);
+
+	/* Setup file for the victim file */
+	victim_dent.d_inode = victim_inode;
+	victim_file.f_dentry = &victim_dent;
+	victim_file.f_mapping = victim_inode->i_mapping;
+
+	/* Set the goal appropriate offset */
+	if (goal == -1) {
+		ext4_get_group_no_and_offset(victim_inode->i_sb,
+				ext_info->ext[0].start, &group, &grp_off);
+		goal = ext4_group_first_block_no(sb, group + 1);
+	}
+
+	for (i = 0; i < ext_info->entries; i++) {
+		/* Move original blocks to another block group */
+		ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+			ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Moving victim file failed. ino [%llu]\n",
+				ext_info->ino);
+			goto err;
+		}
+
+		/* Sync journal blocks before reservation */
+		ret = ext4_force_commit(sb);
+		if (ret) {
+			printk(KERN_ERR "ext4 defrag: "
+				"ext4_force_commit failed(%d)\n", ret);
+			goto err;
+		}
+	}
+
+	iput(victim_inode);
+	return 0;
+err:
+	down_write(&EXT4_I(org_inode)->i_data_sem);
+	ext4_discard_reservation(org_inode);
+	up_write(&EXT4_I(org_inode)->i_data_sem);
+	iput(victim_inode);
+	return ret;
+}
+
+/**
  * ext4_defrag_fblocks_distribution - Search free blocks distribution
  *
  * @org_inode:	original inode
@@ -538,6 +612,16 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			return -EFAULT;

 		err = ext4_defrag_reserve_fblocks(inode, &ext_info);
+	} else if (cmd == EXT4_IOC_MOVE_VICTIM) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+			sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_move_victim(filp, &ext_info);
+
 	} else if (cmd == EXT4_IOC_DEFRAG) {
 		struct ext4_ext_defrag_data defrag;
 		struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
@@ -564,7 +648,8 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		}

 		err = ext4_defrag(filp, defrag.start_offset,
-				defrag.defrag_size, defrag.goal);
+				defrag.defrag_size, defrag.goal, defrag.flag,
+				&defrag.ext);
 	}

 	return err;
@@ -580,6 +665,7 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
  * @start_ext:		first new extent to be merged
  * @new_ext:		middle of new extent to be merged
  * @end_ext:		last new extent to be merged
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -587,14 +673,20 @@ static int
 ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 		struct ext4_extent *o_start, struct ext4_extent *o_end,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext)
+		struct ext4_extent *end_ext, int phase)
 {
 	struct ext4_ext_path *org_path = NULL;
 	ext4_lblk_t eblock = 0;
 	int new_flag = 0;
 	int end_flag = 0;
+	int defrag_flag;
 	int err;

+	if (phase == DEFRAG_FORCE_VICTIM)
+		defrag_flag = 1;
+	else
+		defrag_flag = 0;
+
 	if (le16_to_cpu(start_ext->ee_len) &&
 		le16_to_cpu(new_ext->ee_len) &&
 		le16_to_cpu(end_ext->ee_len)) {
@@ -671,8 +763,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, org_inode,
-					org_path, new_ext);
+		err = ext4_ext_insert_extent_defrag(handle, org_inode,
+					org_path, new_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -685,8 +777,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, org_inode,
-					org_path, end_ext);
+		err = ext4_ext_insert_extent_defrag(handle, org_inode,
+					org_path, end_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -764,6 +856,7 @@ ext4_defrag_merge_inside_block(struct ext4_extent *o_start,
  * @new_ext:	middle of new extent to be merged
  * @end_ext:	last new extent to be merged
  * @replaced:	the number of blocks which will be replaced with new_ext
+ * @phase:	phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -772,7 +865,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
 		struct ext4_ext_path *org_path,
 		struct ext4_extent *o_start, struct ext4_extent *o_end,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext, ext4_fsblk_t replaced)
+		struct ext4_extent *end_ext, ext4_fsblk_t replaced, int phase)
 {
 	struct  ext4_extent_header *eh;
 	unsigned need_slots, slots_range;
@@ -810,7 +903,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,

 		ret = ext4_defrag_merge_across_blocks(handle, org_inode,
 					o_start, o_end, start_ext, new_ext,
-					end_ext);
+					end_ext, phase);
 		if (ret < 0)
 			return ret;
 	} else {
@@ -843,13 +936,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
  * @org_path:		path indicates first extent to be defraged
  * @dext:		destination extent
  * @from:		start offset on the target file
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
 ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 		struct ext4_ext_path *org_path, struct ext4_extent *dext,
-		ext4_lblk_t *from)
+		ext4_lblk_t *from, int phase)
 {
 	struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext;
 	struct ext4_extent new_ext, start_ext, end_ext;
@@ -950,7 +1044,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 				+ le16_to_cpu(oext->ee_len) - 1) {
 			ret = ext4_defrag_merge_extents(handle, org_inode,
 					org_path, o_start, o_end, &start_ext,
-					&new_ext, &end_ext, replaced);
+					&new_ext, &end_ext, replaced, phase);
 			if (ret < 0)
 				return ret;

@@ -1002,6 +1096,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
  * @from:		block offset of org_inode
  * @dest_off:		block offset of dest_inode
  * @count:		block count to be replaced
+ * @phase:              phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  * Replace extents for blocks from "from" to "from + count - 1".
@@ -1009,7 +1104,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 static int
 ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 			struct inode *dest_inode, ext4_lblk_t from,
-			ext4_lblk_t dest_off, ext4_lblk_t count)
+			ext4_lblk_t dest_off, ext4_lblk_t count, int phase)
 {
 	struct ext4_ext_path *org_path = NULL;
 	struct ext4_ext_path *dest_path = NULL;
@@ -1070,7 +1165,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,

 		/* Loop for the original extent blocks */
 		err = ext4_defrag_leaf_block(handle, org_inode,
-						org_path, dext, &from);
+						org_path, dext, &from, phase);
 		if (err < 0)
 			goto out;

@@ -1080,7 +1175,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 		 * e.g. ext4_defrag_merge_extents()
 		 */
 		err = ext4_defrag_leaf_block(handle, dest_inode,
-					dest_path, swap_ext, &dest_off);
+					dest_path, swap_ext, &dest_off, -1);
 		if (err < 0)
 			goto out;

@@ -1176,6 +1271,7 @@ out:
  * @req_blocks:		contiguous blocks count we need
  * @iblock:		target file offset
  * @goal:		goal offset
+ * @phase:              phase of the force defrag mode
  *
  */
 static void
@@ -1184,8 +1280,22 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
 			struct ext4_ext_path *org_path,
 			struct ext4_ext_path *dest_path,
 			ext4_fsblk_t req_blocks, ext4_lblk_t iblock,
-			ext4_fsblk_t goal)
+			ext4_fsblk_t goal, int phase)
 {
+	ext4_group_t org_grp_no;
+	ext4_grpblk_t org_blk_off;
+	int org_depth = ext_depth(org_inode);
+
+	if (phase == DEFRAG_FORCE_VICTIM) {
+		ext4_get_group_no_and_offset(org_inode->i_sb,
+				ext_pblock(org_path[org_depth].p_ext),
+				&org_grp_no, &org_blk_off);
+		ar->excepted_group = org_grp_no;
+	} else {
+		/* Allocate contiguous blocks to any block group */
+		ar->excepted_group = -1;
+	}
+
 	ar->inode = dest_inode;
 	ar->len = req_blocks;
 	ar->logical = iblock;
@@ -1249,6 +1359,56 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
 }

 /**
+ * ext4_defrag_check_phase
+ * 	- Check condition of the allocated blocks (only force defrag mode)
+ *
+ * @ar:			allocation request for multiple block allocation
+ * @dest_grp_no:	block group num of the allocated blocks
+ * @goal_grp_no:	block group num of the destination of block allocation
+ * @alloc_total:	sum total of the allocated blocks
+ * @req_blocks:		contiguous blocks count we need
+ * @phase:              phase of the force defrag mode
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_check_phase(struct ext4_allocation_request *ar,
+			ext4_group_t dest_grp_no, ext4_group_t goal_grp_no,
+			ext4_fsblk_t alloc_total, ext4_lblk_t req_blocks,
+			int phase)
+{
+	int err = 0;
+
+	switch (phase) {
+	case DEFRAG_FORCE_TRY:
+		/* If there is not enough space, return -ENOSPC. */
+		if (ar->len != req_blocks)
+			/* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
+			err = -ENOSPC;
+		break;
+	case DEFRAG_FORCE_VICTIM:
+		/* We can't allocate new blocks in the same block group. */
+		if (dest_grp_no == ar->excepted_group) {
+			printk(KERN_ERR "ext4 defrag: Failed to allocate"
+					" victim file to other block group\n");
+			err = -ENOSPC;
+		}
+		break;
+	case DEFRAG_FORCE_GATHER:
+		/* Maybe reserved blocks are already used by other process. */
+		if (dest_grp_no != goal_grp_no
+		    || alloc_total != req_blocks) {
+			printk(KERN_ERR "ext4 defrag: Reserved blocks are"
+					" already used by other process\n");
+			err = -EIO;
+		}
+		break;
+	}
+
+	return err;
+}
+
+/**
  * ext4_defrag_partial - Defrag a file per page
  *
  * @tmp_inode:			temporary inode
@@ -1257,13 +1417,15 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
  * @dest_blk_offset:		block index on temporary file
  * @data_offset_in_page:	block index where data swapping starts
  * @block_len_in_page:		the number of blocks to be swapped
+ * @phase:			phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
 ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 			pgoff_t org_page_offset, ext4_lblk_t dest_blk_offset,
-			int data_offset_in_page, int block_len_in_page)
+			int data_offset_in_page, int block_len_in_page,
+			int phase)
 {
 	struct inode *org_inode = filp->f_dentry->d_inode;
 	struct address_space *mapping = org_inode->i_mapping;
@@ -1346,7 +1508,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 	try_to_release_page(page, 0);
 	ret = ext4_defrag_replace_branches(handle, org_inode, tmp_inode,
 						org_blk_offset, dest_blk_offset,
-						block_len_in_page);
+						block_len_in_page, phase);
 	if (ret < 0)
 		goto out;

@@ -1397,6 +1559,7 @@ out:
  * @tar_end:		the last block number of the allocated blocks
  * @sum_tmp:		the extents count  in the allocated blocks
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns the values as below.
  *	0 (improved)
@@ -1406,7 +1569,7 @@ out:
 static int
 ext4_defrag_comp_ext_count(struct inode *org_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
-			int sum_tmp, ext4_fsblk_t goal)
+			int sum_tmp, ext4_fsblk_t goal, int phase)
 {
 	struct ext4_extent *ext = NULL;
 	int depth = ext_depth(org_inode);
@@ -1433,7 +1596,8 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
 			if (sum_org == sum_tmp && !goal) {
 				/* Not improved */
 				ret = 1;
-			} else if (sum_org < sum_tmp) {
+			} else if (sum_org < sum_tmp &&
+					phase != DEFRAG_FORCE_VICTIM) {
 				/* Fragment increased */
 				ret = -ENOSPC;
 				printk(KERN_ERR "ext4 defrag: "
@@ -1462,6 +1626,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode,
  * @req_blocks:		the number of blocks to allocate
  * @iblock:		file related offset
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns the value as below:
  *	0 (succeed)
@@ -1472,7 +1637,7 @@ static int
 ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t req_start,
 			ext4_lblk_t req_blocks, ext4_lblk_t iblock,
-			ext4_fsblk_t goal)
+			ext4_fsblk_t goal, int phase)
 {
 	handle_t *handle;
 	struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb);
@@ -1484,6 +1649,8 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 	ext4_fsblk_t newblock = 0;
 	ext4_lblk_t req_end = req_start + req_blocks - 1;
 	ext4_lblk_t rest_blocks = 0;
+	ext4_group_t dest_group_no, goal_group_no;
+	ext4_grpblk_t dest_blk_off, goal_blk_off;
 	int sum_tmp = 0;
 	int metadata = 1;
 	int ret;
@@ -1500,7 +1667,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,

 	/* Fill struct ext4_allocation_request with necessary info */
 	ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
-				dest_path, req_blocks, iblock, goal);
+				dest_path, req_blocks, iblock, goal, phase);

 	handle = ext4_journal_start(tmp_inode, 0);
 	if (IS_ERR(handle)) {
@@ -1508,6 +1675,9 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		goto out2;
 	}

+	ext4_get_group_no_and_offset(tmp_inode->i_sb, goal,
+				&goal_group_no, &goal_blk_off);
+
 	while (alloc_total != req_blocks) {
 		/* Allocate blocks */
 		ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
@@ -1517,9 +1687,21 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		/* Claimed blocks are already reserved */
 		EXT4_I(ar.inode)->i_delalloc_reserved_flag = 1;

+		ext4_get_group_no_and_offset(tmp_inode->i_sb, newblock,
+					&dest_group_no, &dest_blk_off);
+
 		alloc_total += ar.len;
 		rest_blocks = req_blocks - alloc_total;

+		/* the checks that done in force mode */
+		if (phase) {
+			ret = ext4_defrag_check_phase(&ar, dest_group_no,
+					goal_group_no, alloc_total,
+					req_blocks, phase);
+			if (ret < 0)
+				goto out;
+		}
+
 		newex.ee_block = cpu_to_le32(alloc_total - ar.len);
 		ext4_ext_store_pblock(&newex, newblock);
 		newex.ee_len = cpu_to_le16(ar.len);
@@ -1529,13 +1711,14 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 		if (ret < 0)
 			goto out;

-		ar.goal = newblock + ar.len;
+		if (!phase)
+			ar.goal = newblock + ar.len;
 		ar.len = req_blocks - alloc_total;
 		sum_tmp++;
 	}

 	ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end,
-					sum_tmp, goal);
+					sum_tmp, goal, phase);

 out:
 	if (ret < 0 && ar.len)
@@ -1562,14 +1745,16 @@ out2:
  * ext4_defrag_check - Check the environment whether a defrag can be done
  *
  * @org_inode:		original inode
+ * @ext:		extent to be moved (only defrag force mode)
  * @defrag_size:	size of defrag in blocks
  * @goal:		pointer to block offset for allocation
+ * @phase:		phase of the force defrag mode
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
-ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
-		ext4_fsblk_t *goal)
+ext4_defrag_check(struct inode *org_inode, struct ext4_extent_data *ext,
+		ext4_lblk_t defrag_size, ext4_fsblk_t *goal, int *phase)
 {
 	/* ext4 online defrag needs mballoc mount option. */
 	if (!test_opt(org_inode->i_sb, MBALLOC)) {
@@ -1578,6 +1763,17 @@ ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
 		return -EOPNOTSUPP;
 	}

+	if (ext->len) {
+		/* Setup for the force defrag mode */
+		if (ext->len < defrag_size) {
+			printk(KERN_ERR "ext4 defrag: "
+					"Invalid length of extent\n");
+			return -EINVAL;
+		}
+		*phase = DEFRAG_FORCE_GATHER;
+		*goal = ext->start;
+	}
+
 	return 0;
 }

@@ -1659,13 +1855,16 @@ out:
  * @block_start:	starting offset to defrag in blocks
  * @defrag_size:	size of defrag in blocks
  * @goal:		block offset for allocation
+ * @phase:		phase of the force defrag mode
+ * @ext:		extent to be moved (only defrag force mode)
  *
  * This function returns the number of blocks if succeed, otherwise
  * returns error value.
  */
 int
 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-		ext4_lblk_t defrag_size, ext4_fsblk_t goal)
+		ext4_lblk_t defrag_size, ext4_fsblk_t goal, int phase,
+		struct ext4_extent_data *ext)
 {
 	struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
 	struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL;
@@ -1680,7 +1879,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 	int block_len_in_page;

 	/* Check the filesystem environment whether defrag can be done */
-	ret = ext4_defrag_check(org_inode, defrag_size, &goal);
+	ret = ext4_defrag_check(org_inode, ext, defrag_size, &goal, &phase);
 	if (ret < 0)
 		return ret;

@@ -1797,11 +1996,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,

 		ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode,
 					org_path, seq_start, seq_blocks,
-					block_start, goal);
+					block_start, goal, phase);

 		if (ret < 0) {
 			break;
-		} else if (ret == 1) {
+		} else if (ret == 1 && (!goal || (goal && !phase))) {
 			ret = 0;
 			seq_start = le32_to_cpu(ext_cur->ee_block);
 			goto CLEANUP;
@@ -1846,7 +2045,8 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 						org_page_offset,
 						dest_block_offset,
 						data_offset_in_page,
-						block_len_in_page);
+						block_len_in_page,
+						phase);
 			if (ret < 0)
 				goto out;

@@ -1905,6 +2105,10 @@ out:
 		kfree(holecheck_path);
 	}

+	if (phase == DEFRAG_FORCE_GATHER)
+		/* Release reserved block in force mode */
+		ext4_discard_reservation(org_inode);
+
 	up_write(&EXT4_I(org_inode)->i_data_sem);
 	mutex_unlock(&org_inode->i_mutex);

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index eef7885..4e54eb4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -97,6 +97,11 @@ struct ext4_allocation_request {
 	unsigned long len;
 	/* flags. see above EXT4_MB_HINT_* */
 	unsigned long flags;
+	/*
+	 * for ext4 online defrag:
+	 * the block group which is excepted from allocation target
+	 */
+	long long excepted_group;
 };

 /*
@@ -308,6 +313,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC_FREE_BLOCKS_INFO	_IOW('f', 18, struct ext4_extents_info)
 #define EXT4_IOC_FIEMAP_INO		_IOW('f', 19, struct fiemap_ino)
 #define EXT4_IOC_RESERVE_BLOCK		_IOW('f', 20, struct ext4_extents_info)
+#define EXT4_IOC_MOVE_VICTIM		_IOW('f', 21, struct ext4_extents_info)

 /*
  * ioctl commands in 32 bit emulation
@@ -330,8 +336,15 @@ struct ext4_new_group_data {
  *
  * DEFRAG_MAX_ENT:	the maximum number of extents for exchanging between
  *			kernel-space and user-space per an ioctl
+ * DEFRAG_FORCE_TRY:	check whether we have free space fragmentation or not
+ * DEFRAG_FORCE_VICTIM:	move victim extents to make sufficient space
+ * DEFRAG_FORCE_GATHER:	move the target file into the free space made in the
+ *			DEFRAG_FORCE_VICTIM phase
  */
 #define DEFRAG_MAX_ENT		32
+#define DEFRAG_FORCE_TRY	1
+#define DEFRAG_FORCE_VICTIM	2
+#define DEFRAG_FORCE_GATHER	3

 struct ext4_extent_data {
 	ext4_lblk_t block;		/* start logical block number */
@@ -343,6 +356,8 @@ struct ext4_ext_defrag_data {
 	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
 	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
 	ext4_fsblk_t goal;		/* block offset for allocation */
+	int flag;			/* free space mode flag */
+	struct ext4_extent_data ext;
 };

 struct ext4_group_data_info {
@@ -1193,7 +1208,8 @@ extern void ext4_inode_table_set(struct super_block *sb,
 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-			ext4_lblk_t defrag_size, ext4_fsblk_t goal);
+			ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+			int flag, struct ext4_extent_data *ext);
 extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
 				unsigned long);

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6407222..fbe34b4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -251,5 +251,10 @@ extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
 extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 					struct ext4_ext_path *path,
 					ext4_lblk_t block);
+extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext, int defrag);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
+
 #endif /* _EXT4_EXTENTS */

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7fcf72d..32c1aa9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -186,11 +186,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 static ext4_fsblk_t
 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
-			struct ext4_extent *ex, int *err)
+			struct ext4_extent *ex, int *err,
+			ext4_fsblk_t defrag_goal)
 {
 	ext4_fsblk_t goal, newblock;

-	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	if (defrag_goal)
+		goal = defrag_goal;
+	else
+		goal = ext4_ext_find_goal(inode, path,
+					le32_to_cpu(ex->ee_block));
+
 	newblock = ext4_new_meta_block(handle, inode, goal, err);
 	return newblock;
 }
@@ -675,7 +681,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
-				struct ext4_extent *newext, int at)
+				struct ext4_extent *newext, int at,
+				ext4_fsblk_t defrag_goal)
 {
 	struct buffer_head *bh = NULL;
 	int depth = ext_depth(inode);
@@ -726,7 +733,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
 	for (a = 0; a < depth - at; a++) {
 		newblock = ext4_ext_new_meta_block(handle, inode, path,
-						   newext, &err);
+						   newext, &err, defrag_goal);
 		if (newblock == 0)
 			goto cleanup;
 		ablocks[a] = newblock;
@@ -913,7 +920,8 @@ cleanup:
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp = path;
 	struct ext4_extent_header *neh;
@@ -922,7 +930,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	ext4_fsblk_t newblock;
 	int err = 0;

-	newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
+	newblock = ext4_ext_new_meta_block(handle, inode, path,
+					   newext, &err, defrag_goal);
 	if (newblock == 0)
 		return err;

@@ -998,7 +1007,8 @@ out:
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp;
 	int depth, i, err = 0;
@@ -1018,7 +1028,8 @@ repeat:
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
-		err = ext4_ext_split(handle, inode, path, newext, i);
+		err = ext4_ext_split(handle, inode, path, newext, i,
+					defrag_goal);
 		if (err)
 			goto out;

@@ -1031,7 +1042,8 @@ repeat:
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		err = ext4_ext_grow_indepth(handle, inode, path,
+						newext, defrag_goal);
 		if (err)
 			goto out;

@@ -1211,7 +1223,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static ext4_lblk_t
+ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
 	int depth;
@@ -1477,6 +1489,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
 				struct ext4_extent *newext)
 {
+	return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int defrag)
+{
 	struct ext4_extent_header *eh;
 	struct ext4_extent *ex, *fex;
 	struct ext4_extent *nearex; /* nearest extent */
@@ -1484,6 +1509,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	int depth, len, err;
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
+	ext4_fsblk_t defrag_goal;

 	BUG_ON(ext4_ext_get_actual_len(newext) == 0);
 	depth = ext_depth(inode);
@@ -1544,11 +1570,16 @@ repeat:
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 	}

+	if (defrag)
+		defrag_goal = ext_pblock(newext);
+	else
+		defrag_goal = 0;
 	/*
 	 * There is no free space in the found leaf.
 	 * We're gonna add a new leaf in the tree.
 	 */
-	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	err = ext4_ext_create_new_leaf(handle, inode, path,
+					newext, defrag_goal);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);
@@ -2848,6 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	ar.goal = ext4_ext_find_goal(inode, path, iblock);
 	ar.logical = iblock;
 	ar.len = allocated;
+	ar.excepted_group = -1;
 	if (S_ISREG(inode->i_mode))
 		ar.flags = EXT4_MB_HINT_DATA;
 	else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9c992d8..a596785 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -261,7 +261,8 @@ setversion_out:
 	case EXT4_IOC_GROUP_INFO:
 	case EXT4_IOC_FREE_BLOCKS_INFO:
 	case EXT4_IOC_FIEMAP_INO:
-	case EXT4_IOC_RESERVE_BLOCK: {
+	case EXT4_IOC_RESERVE_BLOCK:
+	case EXT4_IOC_MOVE_VICTIM: {
 		return ext4_defrag_ioctl(inode, filp, cmd, arg);
 	}
 	case EXT4_IOC_GROUP_ADD: {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 01a7daa..78f76da 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1775,6 +1775,10 @@ repeat:
 			if (group == EXT4_SB(sb)->s_groups_count)
 				group = 0;

+			if (ac->ac_excepted_group != -1 &&
+			    group == ac->ac_excepted_group)
+				continue;
+
 			/* quick check to skip empty groups */
 			grp = ext4_get_group_info(ac->ac_sb, group);
 			if (grp->bb_free == 0)
@@ -4160,6 +4164,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ac->ac_bitmap_page = NULL;
 	ac->ac_buddy_page = NULL;
 	ac->ac_lg = NULL;
+	ac->ac_excepted_group = ar->excepted_group;

 	/* we have to define context: we'll we work with a file or
 	 * locality group. this is a policy, actually */
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c7c9906..6b46c86 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -211,6 +211,7 @@ struct ext4_allocation_context {
 	struct page *ac_buddy_page;
 	struct ext4_prealloc_space *ac_pa;
 	struct ext4_locality_group *ac_lg;
+	long long ac_excepted_group;
 };

 #define AC_STATUS_CONTINUE	1


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2008-09-27  7:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-27  7:27 [RFC][PATCH 10/12]ext4: Add the EXT4_IOC_MOVE_VICTIM ioctl Akira Fujita

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.