All of lore.kernel.org
 help / color / mirror / Atom feed
From: Akira Fujita <a-fujita@rs.jp.nec.com>
To: linux-ext4@vger.kernel.org, Theodore Tso <tytso@mit.edu>,
	Mingming Cao <cmm@us.ibm.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: linux-fsdevel@vger.kernel.org, Akira Fujita <a-fujita@rs.jp.nec.com>
Subject: [RFC][PATCH 7/8] move victim files for the target file (-f mode)
Date: Fri, 04 Apr 2008 20:20:29 +0900	[thread overview]
Message-ID: <47F60EFD.30100@rs.jp.nec.com> (raw)

ext4: online defrag-- Move victim files for the target file (-f mode)

From: Akira Fujita <a-fujita@rs.jp.nec.com>

Move victim files to make sufficient space and reallocates
the contiguous blocks for the target file.

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/balloc.c       |   10 +-
 fs/ext4/defrag.c       |  427 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/ext4.h         |   29 +++-
 fs/ext4/ext4_extents.h |    5 +
 fs/ext4/extents.c      |   54 +++++--
 fs/ext4/ioctl.c        |    5 +-
 fs/ext4/mballoc.c      |    5 +
 fs/ext4/mballoc.h      |    1 +
 8 files changed, 494 insertions(+), 42 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 49b099c..3e22d69 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -383,7 +383,7 @@ restart:
  * If the goal block is within the reservation window, return 1;
  * otherwise, return 0;
  */
-static int
+int
 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
 			ext4_group_t group, struct super_block *sb)
 {
@@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_block *sb,
  * from the filesystem reservation window rb tree. Must be called with
  * rsv_lock hold.
  */
-static void rsv_window_remove(struct super_block *sb,
+void rsv_window_remove(struct super_block *sb,
 			      struct ext4_reserve_window_node *rsv)
 {
 	rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -503,7 +503,7 @@ static void rsv_window_remove(struct super_block *sb,
  *
  * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
  */
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
+inline int rsv_is_empty(struct ext4_reserve_window *rsv)
 {
 	/* a valid reservation end block could not be 0 */
 	return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -1239,7 +1239,7 @@ static int find_next_reservable_window(
  *	@bitmap_bh: the block group block bitmap
  *
  */
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
 		ext4_grpblk_t grp_goal, struct super_block *sb,
 		ext4_group_t group, struct buffer_head *bitmap_bh)
 {
@@ -1383,7 +1383,7 @@ retry:
  * expand the reservation window size if necessary on a best-effort
  * basis before ext4_new_blocks() tries to allocate blocks,
  */
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
+void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 			struct super_block *sb, int size)
 {
 	struct ext4_reserve_window_node *next_rsv;
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 6b6b873..728e8fb 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -217,6 +217,268 @@ out:
 }

 /**
+ * ext4_defrag_reserve_blocks - Reserve blocks for defrag
+ *
+ * @inode	target inode
+ * @goal	block reservation goal
+ * @len		blocks count to reserve
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+
+static int
+ext4_defrag_reserve_blocks(struct inode *inode, ext4_fsblk_t goal, int len)
+{
+	struct super_block *sb = NULL;
+	handle_t *handle;
+	struct buffer_head *bitmap_bh = NULL;
+	struct ext4_block_alloc_info *block_i;
+	struct ext4_reserve_window_node *my_rsv = NULL;
+	unsigned short windowsz = 0;
+	ext4_group_t group_no;
+	ext4_grpblk_t grp_target_blk;
+	int err = 0;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+
+	handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
+		ext4_init_block_alloc_info(inode);
+	} else if (!S_ISREG(inode->i_mode)) {
+		printk(KERN_ERR "ext4 defrag: Invalid file type\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	sb = inode->i_sb;
+	if (!sb) {
+		printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+		err = -ENXIO;
+		goto out;
+	}
+	ext4_get_group_no_and_offset(sb, goal, &group_no,
+				&grp_target_blk);
+
+	block_i = EXT4_I(inode)->i_block_alloc_info;
+	/* Block reservation should be enabled */
+	BUG_ON(!block_i);
+
+	windowsz = block_i->rsv_window_node.rsv_goal_size;
+	/* Goal size should be set */
+	BUG_ON(!windowsz);
+
+
+	my_rsv = &block_i->rsv_window_node;
+
+	bitmap_bh = read_block_bitmap(sb, group_no);
+	if (!bitmap_bh) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto out;
+
+	err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
+						group_no, bitmap_bh);
+	if (err < 0) {
+		printk(KERN_ERR "ext4 defrag: Block reservation failed."
+				"offset [%d], bg[%lu]\n",
+				grp_target_blk, group_no);
+		ext4_discard_reservation(inode);
+		goto out;
+	} else {
+		if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
+			try_to_extend_reservation(my_rsv, sb,
+				len - EXT4_DEFAULT_RESERVE_BLOCKS);
+
+	}
+
+out:
+	up_write(&EXT4_I(inode)->i_data_sem);
+	ext4_journal_release_buffer(handle, bitmap_bh);
+	brelse(bitmap_bh);
+
+	if (handle)
+		ext4_journal_stop(handle);
+
+	return err;
+}
+
+/**
+ * ext4_defrag_block_within_rsv - Is target extent reserved ?
+ *
+ * @ inode	inode of target file
+ * @ ex_start	start physical block number of the extent
+ *		which already moved
+ * @ ex_len	block length of the extent which already moved
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_block_within_rsv(struct inode *inode,
+				ext4_fsblk_t ex_start, int ex_len)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext4_block_alloc_info *block_i;
+	ext4_group_t group_no;
+	ext4_grpblk_t grp_blk;
+	struct ext4_reserve_window_node *rsv;
+
+	block_i = EXT4_I(inode)->i_block_alloc_info;
+	/* Block reservation should be enabled */
+	BUG_ON(!block_i);
+
+	/* Goal size should be set */
+	BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
+
+	rsv = &block_i->rsv_window_node;
+	if (rsv_is_empty(&rsv->rsv_window)) {
+		printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
+		return -ENOSPC;
+	}
+
+	ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
+
+	if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
+	    || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
+		group_no, sb)){
+		printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
+				"not in rsv_window\n", grp_blk,
+				grp_blk + ex_len - 1, group_no);
+		return -ENOSPC;
+	}
+	return 0;
+}
+
+/*
+ * ext4_defrag_reserve_fblocks - Reserve free blocks
+ *				with ext4_defrag_reserve_blocks
+ *
+ * @inode:		To get a block group number
+ * @ext_info:		freeblocks distribution which stored extent-like style
+ *  @ext_info->ext[]	an array of struct ext4_extents_data
+ */
+static int ext4_defrag_reserve_fblocks(struct inode *inode,
+			struct ext4_extents_info *ext_info)
+{
+	ext4_fsblk_t ex_start = 0;
+	int i;
+	int ret = 0;
+	int len = 0;
+
+	for (i = 0; i < ext_info->entries; i++) {
+		ex_start = ext_info->ext[i].start;
+		len = ext_info->ext[i].len;
+
+		ret = ext4_defrag_reserve_blocks(inode, ex_start, len);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Block reservation failed. offset [%llu], "
+				"length [%d]\n", ex_start, len);
+			goto err;
+		}
+		ret = ext4_defrag_block_within_rsv(inode, ex_start, len);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Reservation window is not set. "
+				"offset [%llu], length [%d]\n", ex_start, len);
+			goto err;
+		}
+	}
+	return ret;
+
+err:
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ext4_discard_reservation(inode);
+	up_write(&EXT4_I(inode)->i_data_sem);
+	return ret;
+}
+
+/**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @target_filp	target file
+ * @ext_info	target extents array to move
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_move_victim(struct file *target_filp,
+			struct ext4_extents_info *ext_info)
+{
+	struct inode *target_inode = target_filp->f_dentry->d_inode;
+	struct super_block *sb = target_inode->i_sb;
+	struct file victim_file;
+	struct dentry victim_dent;
+	struct inode *victim_inode;
+	ext4_fsblk_t goal = ext_info->goal;
+	int ret = 0;
+	int i = 0;
+	struct ext4_extent_data ext;
+	ext4_group_t group;
+	ext4_grpblk_t grp_off;
+
+	/* Setup dummy extent data */
+	ext.len = 0;
+
+	/* Get the inode of the victim file */
+	victim_inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(victim_inode))
+		return PTR_ERR(victim_inode);
+
+	/* Setup file for the victim file */
+	victim_dent.d_inode = victim_inode;
+	victim_file.f_dentry = &victim_dent;
+	victim_file.f_mapping = victim_inode->i_mapping;
+
+	/* Set the goal appropriate offset */
+	if (goal == -1) {
+		ext4_get_group_no_and_offset(victim_inode->i_sb,
+				ext_info->ext[0].start, &group, &grp_off);
+		goal = ext4_group_first_block_no(sb, group + 1);
+	}
+
+	for (i = 0; i < ext_info->entries; i++) {
+		/* Move original blocks to another block group */
+		ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+			ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Moving victim file failed. ino [%llu]\n",
+				ext_info->ino);
+			goto err;
+		}
+
+		/* Sync journal blocks before reservation */
+		ret = ext4_force_commit(sb);
+		if (ret) {
+			printk(KERN_ERR "ext4 defrag: "
+				"ext4_force_commit failed(%d)\n", ret);
+			goto err;
+		}
+	}
+
+	iput(victim_inode);
+	return 0;
+err:
+	down_write(&EXT4_I(target_inode)->i_data_sem);
+	ext4_discard_reservation(target_inode);
+	up_write(&EXT4_I(target_inode)->i_data_sem);
+	iput(victim_inode);
+	return ret;
+}
+
+/**
  * ext4_defrag_fblocks_distribution - Search free blocks distribution
  *
  * @inode	target file
@@ -379,6 +641,29 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 				&ext_info, sizeof(ext_info)))
 				return -EFAULT;
 		}
+	} else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+				(struct ext4_extents_info __user *)arg,
+				sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_reserve_fblocks(inode, &ext_info);
+	} else if (cmd == EXT4_IOC_MOVE_VICTIM) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+			sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_move_victim(filp, &ext_info);
+
+	} else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
+		down_write(&EXT4_I(inode)->i_data_sem);
+		ext4_discard_reservation(inode);
+		up_write(&EXT4_I(inode)->i_data_sem);
 	} else if (cmd == EXT4_IOC_DEFRAG) {
 		struct ext4_ext_defrag_data defrag;

@@ -387,7 +672,8 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			sizeof(defrag)))
 			return -EFAULT;
 		err = ext4_defrag(filp, defrag.start_offset,
-				defrag.defrag_size, defrag.goal);
+				defrag.defrag_size, defrag.goal, defrag.flag,
+				&defrag.ext);
 	}

 	return err;
@@ -403,6 +689,7 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
  * @start_ext	first new extent to be merged
  * @new_ext	middle of new extent to be merged
  * @end_ext	last new extent to be merged
+ * @flag	defrag mode (e.g. -f)
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -410,13 +697,20 @@ static int
 ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
 		struct ext4_extent *o_start,
 		struct ext4_extent *o_end, struct ext4_extent *start_ext,
-		struct ext4_extent *new_ext, struct ext4_extent *end_ext)
+		struct ext4_extent *new_ext, struct ext4_extent *end_ext,
+		int flag)
 {
 	struct ext4_ext_path *org_path = NULL;
 	ext4_lblk_t eblock = 0;
 	int err = 0;
 	int new_flag = 0;
 	int end_flag = 0;
+	int defrag_flag;
+
+	if (flag == DEFRAG_FORCE_VICTIM)
+		defrag_flag = 1;
+	else
+		defrag_flag = 0;

 	if (le16_to_cpu(start_ext->ee_len) &&
 		le16_to_cpu(new_ext->ee_len) &&
@@ -494,7 +788,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, inode, org_path, new_ext);
+		err = ext4_ext_insert_extent_defrag(handle, inode,
+					org_path, new_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -507,7 +802,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
 			org_path = NULL;
 			goto out;
 		}
-		err = ext4_ext_insert_extent(handle, inode, org_path, end_ext);
+		err = ext4_ext_insert_extent_defrag(handle, inode,
+					org_path, end_ext, defrag_flag);
 		if (err)
 			goto out;
 	}
@@ -588,6 +884,7 @@ ext4_defrag_merge_inside_block(handle_t *handle, struct inode *inode,
  * @new_ext	middle of new extent to be merged
  * @end_ext	last new extent to be merged
  * @replaced	the number of blocks which will be replaced with new_ext
+ * @flag	defrag mode (e.g. -f)
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
@@ -596,7 +893,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
 		struct ext4_ext_path *org_path,
 		struct ext4_extent *o_start, struct ext4_extent *o_end,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext, ext4_fsblk_t replaced)
+		struct ext4_extent *end_ext, ext4_fsblk_t replaced, int flag)
 {
 	struct  ext4_extent_header *eh;
 	unsigned need_slots, slots_range;
@@ -634,7 +931,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,

 		ret = ext4_defrag_merge_across_blocks(handle, inode, o_start,
 						o_end, start_ext, new_ext,
-						end_ext);
+						end_ext, flag);
 		if (ret < 0)
 			return ret;
 	} else {
@@ -667,13 +964,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
  * @org_path	path indicates first extent to be defraged
  * @dext	destination extent
  * @from	start offset on the target file
+ * @flag	defrag mode (e.g. -f)
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
 ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 		struct ext4_ext_path *org_path, struct ext4_extent *dext,
-		ext4_lblk_t *from)
+		ext4_lblk_t *from, int flag)
 {
 	unsigned long depth;
 	ext4_fsblk_t replaced = 0;
@@ -774,7 +1072,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 				+ le16_to_cpu(oext->ee_len) - 1) {
 			ret = ext4_defrag_merge_extents(handle, org_inode,
 					org_path, o_start, o_end, &start_ext,
-					&new_ext, &end_ext, replaced);
+					&new_ext, &end_ext, replaced, flag);
 			if (ret < 0)
 				return ret;

@@ -835,6 +1133,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
  * @from_page		page offset of org_inode
  * @dest_from_page	page offset of dest_inode
  * @count_page		page count to be replaced
+ * @flag		defrag mode (e.g. -f)
  *
  * This function returns 0 if succeed, otherwise returns error value.
  * Replace extents for blocks from "from" to "from + count - 1".
@@ -842,7 +1141,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
 static int
 ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 			struct inode *dest_inode, pgoff_t from_page,
-			pgoff_t dest_from_page, pgoff_t count_page)
+			pgoff_t dest_from_page, pgoff_t count_page, int flag)
 {
 	struct ext4_ext_path *org_path = NULL;
 	struct ext4_ext_path *dest_path = NULL;
@@ -910,7 +1209,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,

 		/* Loop for the original extent blocks */
 		err = ext4_defrag_leaf_block(handle, org_inode,
-						org_path, dext, &from);
+						org_path, dext, &from, flag);
 		if (err < 0)
 			goto out;

@@ -920,7 +1219,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
 		 * e.g. ext4_defrag_merge_extents()
 		 */
 		err = ext4_defrag_leaf_block(handle, dest_inode,
-					dest_path, swap_ext, &dest_off);
+					dest_path, swap_ext, &dest_off, -1);
 		if (err < 0)
 			goto out;

@@ -1013,13 +1312,14 @@ out:
  * @iblock		file related offset
  * @total_blocks	contiguous blocks count
  * @goal		block offset for allocation
+ * @phase		phase of the force defrag mode
  *
  * If succeed, fuction returns count of extent we got,
  * otherwise returns err.
  */
 static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
 		struct inode *org_inode, ext4_lblk_t iblock,
-		ext4_fsblk_t total_blocks, ext4_fsblk_t goal)
+		ext4_fsblk_t total_blocks, ext4_fsblk_t goal, int phase)
 {
 	handle_t *handle = NULL;
 	struct ext4_ext_path *dest_path = NULL;
@@ -1032,8 +1332,9 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
 	ext4_fsblk_t rest = total_blocks;
 	ext4_fsblk_t alloc_total = 0;
 	unsigned long org_len;
-	ext4_group_t dest_grp_no;
-	ext4_grpblk_t dest_blk_off;
+	ext4_group_t dest_grp_no, org_grp_no, goal_grp_no;
+	ext4_grpblk_t dest_blk_off, org_blk_off, goal_blk_off;
+	int org_depth = ext_depth(org_inode);
 	int metadata = 1;
 	int count = 0;
 	int credits = 0;
@@ -1044,6 +1345,22 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
 	ar.len = total_blocks;
 	org_len = ar.len;

+	/* Calculate group nubmer of org_inode block */
+	if (phase == DEFRAG_FORCE_VICTIM) {
+		org_path = ext4_ext_find_extent(org_inode, iblock, org_path);
+		if (IS_ERR(org_path)) {
+			err = PTR_ERR(org_path);
+			org_path = NULL;
+			goto out2;
+		}
+		ext4_get_group_no_and_offset(org_inode->i_sb,
+				ext_pblock(org_path[org_depth].p_ext),
+				&org_grp_no, &org_blk_off);
+		ar.excepted_group = org_grp_no;
+	} else {
+		ar.excepted_group = -1;
+	}
+
 	/* Find first extent */
 	dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path);
 	if (IS_ERR(dest_path)) {
@@ -1087,6 +1404,13 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
 		if (err) {
 			/* Failed to get the contiguous blocks */
 			goto out;
+		} else if ((ar.len != org_len) &&
+				(phase == DEFRAG_FORCE_TRY)) {
+			ext4_free_blocks(handle, org_inode, newblock,
+						ar.len, metadata);
+			/* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
+			err = -ENOSPC;
+			goto out;
 		} else {
 			/*
 			 * Dirty buffer_head causes the overwriting
@@ -1104,13 +1428,51 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,

 			alloc_total += ar.len;
 			ext4_get_group_no_and_offset(dest_inode->i_sb,
+				goal, &goal_grp_no, &goal_blk_off);
+			ext4_get_group_no_and_offset(dest_inode->i_sb,
 				newblock, &dest_grp_no, &dest_blk_off);

+			/* Only the force defrag mode */
+			switch (phase) {
+			case DEFRAG_FORCE_VICTIM:
+				/*
+				 * We can't allocate new blocks in the same
+				 * block group.
+				 */
+				if (dest_grp_no == org_grp_no) {
+					printk(KERN_ERR "ext4 defrag: "
+						"Failed to allocate victim file"
+						" to other block group\n");
+					ext4_free_blocks(handle, org_inode,
+						newblock, ar.len, metadata);
+					err = -ENOSPC;
+					goto out;
+				}
+				break;
+			case DEFRAG_FORCE_GATHER:
+				/*
+				 * Maybe reserved blocks are already used by
+				 * other process.
+				 */
+				if (dest_grp_no != goal_grp_no
+					|| alloc_total != total_blocks) {
+					printk(KERN_ERR "ext4 defrag: "
+						"Reserved blocks are already "
+						"used by other process\n");
+					ext4_free_blocks(handle, org_inode,
+						newblock, ar.len, metadata);
+					err = -EIO;
+					goto out;
+				}
+				break;
+			}
+
 			newex.ee_block = cpu_to_le32(alloc_total - ar.len);
 			ext4_ext_store_pblock(&newex, newblock);
 			newex.ee_len = cpu_to_le16(ar.len);

-			ar.goal = newblock + ar.len;
+			if (!phase)
+				ar.goal = newblock + ar.len;
 			rest = rest - ar.len;
 			ar.len = rest;

@@ -1158,12 +1520,13 @@ out2:
  * @filp:		pointer to file
  * @org_offset:		page index on original file
  * @dest_offset:	page index on temporary file
+ * @flag:		defrag mode (e.g. -f)
  *
  * This function returns 0 if succeeded, otherwise returns error value.
  */
 static int
 ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
-			pgoff_t org_offset, pgoff_t dest_offset)
+			pgoff_t org_offset, pgoff_t dest_offset, int flag)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
@@ -1233,7 +1596,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 	/* Release old bh and drop refs */
 	try_to_release_page(page, 0);
 	ret = ext4_defrag_replace_branches(handle, inode, tmp_inode,
-					org_offset, dest_offset, 1);
+					org_offset, dest_offset, 1, flag);

 	if (ret < 0)
 		goto out;
@@ -1282,6 +1645,7 @@ out:
  * @tar_blocks:		the number of blocks to allocate
  * @iblock:		file related offset
  * @goal:		block offset for allocaton
+ * @flag:               phase of the force defrag mode
  *
  * This function returns the value as below:
  *	0(succeeded)
@@ -1292,7 +1656,7 @@ static int
 ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
 			struct ext4_ext_path *path, ext4_lblk_t tar_start,
 			ext4_lblk_t tar_blocks, ext4_lblk_t iblock,
-			ext4_fsblk_t goal)
+			ext4_fsblk_t goal, int flag)
 {
 	struct ext4_extent *ext = NULL;
 	struct ext4_extent_header *eh = NULL;
@@ -1306,7 +1670,7 @@ ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,

 	/* Allocate contiguous blocks */
 	sum_tmp = ext4_defrag_alloc_blocks(tmp_inode, inode, iblock,
-					tar_blocks, goal);
+					tar_blocks, goal, flag);
 	if (sum_tmp < 0) {
 		ret = sum_tmp;
 		goto out;
@@ -1328,7 +1692,8 @@ ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
 				ret = ext4_ext_remove_space(tmp_inode, 0);
 				if (!ret)
 					ret = 1;
-			} else if (sum_org < sum_tmp) {
+			} else if (sum_org < sum_tmp &&
+					flag != DEFRAG_FORCE_VICTIM) {
 				/* Fragment increased */
 				ret = ext4_ext_remove_space(tmp_inode, 0);
 				if (!ret)
@@ -1355,13 +1720,16 @@ out:
  * @block_start:	starting offset to defrag in blocks
  * @defrag_size:	size of defrag in blocks
  * @goal:		block offset for allocation
+ * @flag:		phase of the force defrag mode
+ * @ext:		extent to be moved (only -f)
  *
  * This function returns the number of blocks if succeeded, otherwise
  * returns error value.
  */
 int
 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-		ext4_lblk_t defrag_size, ext4_fsblk_t goal)
+		ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+		int flag, struct ext4_extent_data *ext)
 {
 	struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
 	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
@@ -1397,6 +1765,17 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 		return -EINVAL;
 	}

+	if (ext->len) {
+		/* Setup for the force defrag mode */
+		if (ext->len < defrag_size) {
+			printk(KERN_ERR "ext4 defrag: "
+					"Invalid length of extent\n");
+			return -EINVAL;
+		}
+		flag = DEFRAG_FORCE_GATHER;
+		goal = ext->start;
+	}
+
 	if (file_end < block_end)
 		defrag_size -= block_end - file_end;

@@ -1520,11 +1899,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 		}

 		ret = ext4_defrag_new_extent_tree(inode, tmp_inode, path,
-			seq_start, seq_blocks, block_start, goal);
+			seq_start, seq_blocks, block_start, goal, flag);

 		if (ret < 0) {
 			break;
-		} else if (ret == 1) {
+		} else if ((ret == 1) && (!goal || (goal && !flag))) {
 			ret = 0;
 			seq_start = le32_to_cpu(ext_cur->ee_block);
 			goto CLEANUP;
@@ -1549,7 +1928,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 		while (page_offset <= seq_end_page) {
 			/* Swap original branches with new branches */
 			ret = ext4_defrag_partial(tmp_inode, filp,
-					page_offset, dest_offset);
+					page_offset, dest_offset, flag);
 			if (ret < 0)
 				goto out;

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ad553e1..6795fe3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -94,6 +94,11 @@ struct ext4_allocation_request {
 	unsigned long len;
 	/* flags. see above EXT4_MB_HINT_* */
 	unsigned long flags;
+	/*
+	 * for ext4 online defrag:
+	 * the block group which is excepted from allocation target
+	 */
+	long long excepted_group;
 };

 /*
@@ -303,6 +308,9 @@ struct ext4_new_group_data {
 #define EXT4_IOC_GROUP_INFO	_IOW('f', 11, struct ext4_group_data_info)
 #define EXT4_IOC_FREE_BLOCKS_INFO	_IOW('f', 12, struct ext4_extents_info)
 #define EXT4_IOC_EXTENTS_INFO		_IOW('f', 13, struct ext4_extents_info)
+#define EXT4_IOC_RESERVE_BLOCK		_IOW('f', 14, struct ext4_extents_info)
+#define EXT4_IOC_MOVE_VICTIM		_IOW('f', 15, struct ext4_extents_info)
+#define EXT4_IOC_BLOCK_RELEASE		_IO('f', 8)

 /*
  * ioctl commands in 32 bit emulation
@@ -331,8 +339,15 @@ struct ext4_new_group_data {
  *
  * DEFRAG_MAX_ENT:	the maximum number of extents for exchanging between
  *			kernel-space and user-space per an ioctl
+ * DEFRAG_FORCE_TRY:	check whether we have free space fragmentation or not
+ * DEFRAG_FORCE_VICTIM:	move victim extents to make sufficient space
+ * DEFRAG_FORCE_GATHER:	move the target file into the free space made in the
+ *			DEFRAG_FORCE_VICTIM phase
  */
 #define DEFRAG_MAX_ENT		32
+#define DEFRAG_FORCE_TRY	1
+#define DEFRAG_FORCE_VICTIM	2
+#define DEFRAG_FORCE_GATHER	3

 struct ext4_extent_data {
 	ext4_lblk_t block;		/* start logical block number */
@@ -344,6 +359,8 @@ struct ext4_ext_defrag_data {
 	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
 	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
 	ext4_fsblk_t goal;		/* block offset for allocation */
+	int flag;			/* free space mode flag */
+	struct ext4_extent_data ext;
 };

 struct ext4_group_data_info {
@@ -1037,8 +1054,17 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
 extern void ext4_init_block_alloc_info(struct inode *);
 extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
+					struct super_block *, int);
+extern int alloc_new_reservation(struct ext4_reserve_window_node *,
+				ext4_grpblk_t, struct super_block *,
+				ext4_group_t, struct buffer_head *);
 extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
 				struct buffer_head *, ext4_grpblk_t);
+extern int rsv_is_empty(struct ext4_reserve_window *rsv);
+extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
+				ext4_grpblk_t grp_goal, ext4_group_t group,
+				struct super_block *sb);

 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1164,7 +1190,8 @@ extern void ext4_inode_table_set(struct super_block *sb,
 extern handle_t *ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-			ext4_lblk_t defrag_size, ext4_fsblk_t goal);
+			ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+			int flag, struct ext4_extent_data *ext);
 extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
 				unsigned long);

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 734c1c7..d9a6a73 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -233,5 +233,10 @@ extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
 extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 					struct ext4_ext_path *path,
 					ext4_lblk_t block);
+extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext, int defrag);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
+
 #endif /* _EXT4_EXTENTS */

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e60e51b..a455c08 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -183,11 +183,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 static ext4_fsblk_t
 ext4_ext_new_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
-			struct ext4_extent *ex, int *err)
+			struct ext4_extent *ex, int *err,
+			ext4_fsblk_t defrag_goal)
 {
 	ext4_fsblk_t goal, newblock;

-	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	if (defrag_goal) {
+		goal = defrag_goal;
+	} else {
+		goal = ext4_ext_find_goal(inode, path,
+				le32_to_cpu(ex->ee_block));
+	}
 	newblock = ext4_new_block(handle, inode, goal, err);
 	return newblock;
 }
@@ -638,7 +644,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
-				struct ext4_extent *newext, int at)
+				struct ext4_extent *newext, int at,
+				ext4_fsblk_t defrag_goal)
 {
 	struct buffer_head *bh = NULL;
 	int depth = ext_depth(inode);
@@ -688,7 +695,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
 	for (a = 0; a < depth - at; a++) {
-		newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+		newblock = ext4_ext_new_block(handle, inode, path,
+						newext, &err, defrag_goal);
 		if (newblock == 0)
 			goto cleanup;
 		ablocks[a] = newblock;
@@ -875,7 +883,8 @@ cleanup:
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp = path;
 	struct ext4_extent_header *neh;
@@ -884,7 +893,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	ext4_fsblk_t newblock;
 	int err = 0;

-	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+	newblock = ext4_ext_new_block(handle, inode, path,
+					newext, &err, defrag_goal);
 	if (newblock == 0)
 		return err;

@@ -960,7 +970,8 @@ out:
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp;
 	int depth, i, err = 0;
@@ -980,7 +991,8 @@ repeat:
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
-		err = ext4_ext_split(handle, inode, path, newext, i);
+		err = ext4_ext_split(handle, inode, path,
+					newext, i, defrag_goal);

 		/* refill path */
 		ext4_ext_drop_refs(path);
@@ -991,7 +1003,8 @@ repeat:
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		err = ext4_ext_grow_indepth(handle, inode, path,
+						newext, defrag_goal);
 		if (err)
 			goto out;

@@ -1171,7 +1184,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static ext4_lblk_t
+ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
 	int depth;
@@ -1437,6 +1450,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
 				struct ext4_extent *newext)
 {
+	return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int defrag)
+{
 	struct ext4_extent_header * eh;
 	struct ext4_extent *ex, *fex;
 	struct ext4_extent *nearex; /* nearest extent */
@@ -1444,6 +1470,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	int depth, len, err;
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
+	ext4_fsblk_t defrag_goal;

 	BUG_ON(ext4_ext_get_actual_len(newext) == 0);
 	depth = ext_depth(inode);
@@ -1504,11 +1531,16 @@ repeat:
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 	}

+	if (defrag)
+		defrag_goal = ext_pblock(newext);
+	else
+		defrag_goal = 0;
 	/*
 	 * There is no free space in the found leaf.
 	 * We're gonna add a new leaf in the tree.
 	 */
-	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	err = ext4_ext_create_new_leaf(handle, inode, path,
+					newext, defrag_goal);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f216caa..6051901 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -235,7 +235,10 @@ flags_err:
 	case EXT4_IOC_DEFRAG:
 	case EXT4_IOC_GROUP_INFO:
 	case EXT4_IOC_FREE_BLOCKS_INFO:
-	case EXT4_IOC_EXTENTS_INFO: {
+	case EXT4_IOC_EXTENTS_INFO:
+	case EXT4_IOC_RESERVE_BLOCK:
+	case EXT4_IOC_MOVE_VICTIM:
+	case EXT4_IOC_BLOCK_RELEASE: {
 		return ext4_defrag_ioctl(inode, filp, cmd, arg);
 	}
 	case EXT4_IOC_GROUP_ADD: {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 519e87b..1589dbc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1750,6 +1750,10 @@ repeat:
 			if (group == EXT4_SB(sb)->s_groups_count)
 				group = 0;

+			if (ac->ac_excepted_group != -1 &&
+			    group == ac->ac_excepted_group)
+				continue;
+
 			/* quick check to skip empty groups */
 			grp = ext4_get_group_info(ac->ac_sb, group);
 			if (grp->bb_free == 0)
@@ -3939,6 +3943,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 	ac->ac_bitmap_page = NULL;
 	ac->ac_buddy_page = NULL;
 	ac->ac_lg = NULL;
+	ac->ac_excepted_group = ar->excepted_group;

 	/* we have to define context: we'll we work with a file or
 	 * locality group. this is a policy, actually */
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add..1141ad5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -205,6 +205,7 @@ struct ext4_allocation_context {
 	struct page *ac_buddy_page;
 	struct ext4_prealloc_space *ac_pa;
 	struct ext4_locality_group *ac_lg;
+	long long ac_excepted_group;
 };

 #define AC_STATUS_CONTINUE	1


                 reply	other threads:[~2008-04-04 11:24 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47F60EFD.30100@rs.jp.nec.com \
    --to=a-fujita@rs.jp.nec.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=cmm@us.ibm.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.