linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-24 12:31 Akira Fujita
  2008-03-25 17:48 ` Aneesh Kumar K.V
  0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-24 12:31 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 1322 bytes --]

Hi Aneesh,
> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>   
>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>> in ext4 online defrag when kernel supports it.
>
> Can you also look at the sparse warnings with the patches ?
>
I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
with old one in the ext4 patch queue? 

Regards, Akira

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
---
 defrag.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff -Nurp linux-2.6.25-rc6-full/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6-full/fs/ext4/defrag.c    2008-03-24 20:37:10.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c    2008-03-24 20:40:22.000000000 +0900
@@ -454,7 +454,7 @@ static int ext4_defrag_move_victim(struc
             ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
         if (ret < 0) {
             printk(KERN_ERR "ext4 defrag: "
-                "Moving victim file failed. ino [%lu]\n",
+                "Moving victim file failed. ino [%llu]\n",
                 ext_info->ino);
             goto err;
         }

[-- Attachment #2: ext4-online-defrag-free-space-fragmentation.patch --]
[-- Type: application/octet-stream, Size: 22591 bytes --]

Free space fragmentation functions

From: Akira Fujita <a-fujita@rs.jp.nec.com>

Defrag tries to move other files to make sufficient space
and reallocates the contiguous blocks for the target file.

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 balloc.c  |   12 -
 defrag.c  |  554 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 extents.c |   52 ++++-
 3 files changed, 602 insertions(+), 16 deletions(-)
diff -Nrup linux-2.6.25-rc6/fs/ext4/balloc.c linux-2.6.25-rc6-fix/fs/ext4/balloc.c
--- linux-2.6.25-rc6/fs/ext4/balloc.c	2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/balloc.c	2008-03-24 20:39:58.000000000 +0900
@@ -383,7 +383,7 @@ restart:
  * If the goal block is within the reservation window, return 1;
  * otherwise, return 0;
  */
-static int
+int
 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
 			ext4_group_t group, struct super_block *sb)
 {
@@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_bl
  * from the filesystem reservation window rb tree. Must be called with
  * rsv_lock hold.
  */
-static void rsv_window_remove(struct super_block *sb,
+void rsv_window_remove(struct super_block *sb,
 			      struct ext4_reserve_window_node *rsv)
 {
 	rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -503,7 +503,7 @@ static void rsv_window_remove(struct sup
  *
  * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
  */
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
+inline int rsv_is_empty(struct ext4_reserve_window *rsv)
 {
 	/* a valid reservation end block could not be 0 */
 	return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -869,7 +869,7 @@ static int ext4_test_allocatable(ext4_gr
  * bitmap on disk and the last-committed copy in journal, until we find a
  * bit free in both bitmaps.
  */
-static ext4_grpblk_t
+ext4_grpblk_t
 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
 					ext4_grpblk_t maxblocks)
 {
@@ -1239,7 +1239,7 @@ static int find_next_reservable_window(
  *	@bitmap_bh: the block group block bitmap
  *
  */
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
 		ext4_grpblk_t grp_goal, struct super_block *sb,
 		ext4_group_t group, struct buffer_head *bitmap_bh)
 {
@@ -1383,7 +1383,7 @@ retry:
  * expand the reservation window size if necessary on a best-effort
  * basis before ext4_new_blocks() tries to allocate blocks,
  */
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
+void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 			struct super_block *sb, int size)
 {
 	struct ext4_reserve_window_node *next_rsv;
diff -Nrup linux-2.6.25-rc6/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6/fs/ext4/defrag.c	2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c	2008-03-24 20:40:22.000000000 +0900
@@ -20,6 +20,12 @@
 #include <linux/ext4_fs_extents.h>
 #include "group.h"
 
+#define EXT_SET_EXTENT_DATA(src, dest)  do {			\
+		dest.block = le32_to_cpu(src->ee_block);	\
+		dest.start = ext_pblock(src);			\
+		dest.len = le16_to_cpu(src->ee_len);		\
+					} while (0)
+
 /**
  * ext4_defrag_next_extent - Search for the next extent and set it to "extent"
  *
@@ -89,6 +95,486 @@ ext4_defrag_next_extent(struct inode *in
 	return 1;
 }
 
+/**
+ * ext4_defrag_extents_info - Get extents information
+ *
+ * @sb:				for ext4_iget()
+ * @ext_info:			pointer to ext4_extents_info
+ *  @ext_info->ino		describe an inode which is used to get
+ *				extent information
+ *  @ext_info->max_entries:	defined by DEFRAG_MAX_ENT
+ *  @ext_info->entries:		amount of extents (output)
+ *  @ext_info->ext[]:		array of extent (output)
+ *  @ext_info->offset:		starting block offset of targeted extent
+ *				(file relative)
+ *
+ * This function returns 0 if the next extent(s) exists,
+ * or returns 1 if the next extent doesn't exist,
+ * otherwise returns error value.
+ */
+static int ext4_defrag_extents_info(struct super_block *sb,
+				struct ext4_extents_info *ext_info)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_extent *ext = NULL;
+	struct inode *inode = NULL;
+	ext4_lblk_t offset = ext_info->f_offset;
+	int max_entries = ext_info->max_entries;
+	int depth = 0;
+	int entries = 0;
+	int err = 0;
+	int ret = 0;
+
+	inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+
+	/* Return -ENOENT if a file does not exist */
+	if ((!inode->i_nlink) || (inode->i_ino < 11) ||
+			!S_ISREG(inode->i_mode)) {
+		ext_info->entries = 0;
+		err = -ENOENT;
+		goto out;
+	}
+
+	path = ext4_ext_find_extent(inode, offset, NULL);
+	if (IS_ERR(path)) {
+		err = PTR_ERR(path);
+		path = NULL;
+		goto out;
+	}
+	depth = ext_depth(inode);
+
+	/* Skip the 0 size file */
+	if (path[depth].p_ext == NULL) {
+		ext_info->entries = 0;
+		goto out;
+	}
+	ext = path[depth].p_ext;
+	EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+	entries = 1;
+
+	/*
+	 * The ioctl repeats this loop 'max_entries' times.
+	 * So we have to call this function again if @inode had
+	 * more the number of extents than 'max_entries'.
+	 */
+	while (entries < max_entries) {
+		ret = ext4_defrag_next_extent(inode, path, &ext);
+		if (ret == 0) {
+			/* Found the next extent (it means not the last one) */
+			EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+			entries++;
+
+			/*
+			 * In case @inode has > 'max_entries' extents,
+			 * we must call this function again and restart from
+			 * 'max_entries * n + 1'th extent.
+			 * 'n' is the number of calling this function
+			 * at the same @inode.
+			 */
+			if (entries == max_entries) {
+				ext_info->f_offset =
+						le32_to_cpu(ext->ee_block) +
+						le32_to_cpu(ext->ee_len);
+				/* Check the extent is the last one or not */
+				ret =
+				    ext4_defrag_next_extent(inode, path, &ext);
+				if (ret == 1) {
+					err = ret;
+				} else if (ret < 0) {
+					/* Failed to get the next extent */
+					err = ret;
+					goto out;
+				}
+				break;
+			}
+
+		} else if (ret == 1) {
+			/* The extent is the last one */
+			ext_info->f_offset = 0;
+			err = ret;
+			break;
+		} else {
+			/* Failed to get the next extent */
+			err = ret;
+			goto out;
+		}
+	}
+
+	ext_info->entries = entries;
+
+out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	up_write(&EXT4_I(inode)->i_data_sem);
+	iput(inode);
+	return err;
+}
+
+/**
+ * ext4_defrag_reserve_blocks - Reserve blocks for defrag
+ *
+ * @inode	target inode
+ * @goal	block reservation goal
+ * @len		blocks count to reserve
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+
+static int
+ext4_defrag_reserve_blocks(struct inode *inode, ext4_fsblk_t goal, int len)
+{
+	struct super_block *sb = NULL;
+	handle_t *handle;
+	struct buffer_head *bitmap_bh = NULL;
+	struct ext4_block_alloc_info *block_i;
+	struct ext4_reserve_window_node *my_rsv = NULL;
+	unsigned short windowsz = 0;
+	ext4_group_t group_no;
+	ext4_grpblk_t grp_target_blk;
+	int err = 0;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+
+	handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
+		ext4_init_block_alloc_info(inode);
+	} else if (!S_ISREG(inode->i_mode)) {
+		printk(KERN_ERR "ext4 defrag: Invalid file type\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	sb = inode->i_sb;
+	if (!sb) {
+		printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+		err = -ENXIO;
+		goto out;
+	}
+	ext4_get_group_no_and_offset(sb, goal, &group_no,
+				&grp_target_blk);
+
+	block_i = EXT4_I(inode)->i_block_alloc_info;
+	/* Block reservation should be enabled */
+	BUG_ON(!block_i);
+
+	windowsz = block_i->rsv_window_node.rsv_goal_size;
+	/* Goal size should be set */
+	BUG_ON(!windowsz);
+
+
+	my_rsv = &block_i->rsv_window_node;
+
+	bitmap_bh = read_block_bitmap(sb, group_no);
+	if (!bitmap_bh) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto out;
+
+	err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
+						group_no, bitmap_bh);
+	if (err < 0) {
+		printk(KERN_ERR "ext4 defrag: Block reservation failed."
+				"offset [%d], bg[%lu]\n",
+				grp_target_blk, group_no);
+		ext4_discard_reservation(inode);
+		goto out;
+	} else {
+		if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
+			try_to_extend_reservation(my_rsv, sb,
+				len - EXT4_DEFAULT_RESERVE_BLOCKS);
+
+	}
+
+out:
+	up_write(&EXT4_I(inode)->i_data_sem);
+	ext4_journal_release_buffer(handle, bitmap_bh);
+	brelse(bitmap_bh);
+
+	if (handle)
+		ext4_journal_stop(handle);
+
+	return err;
+}
+
+/**
+ * ext4_defrag_block_within_rsv - Is target extent reserved ?
+ *
+ * @ inode	inode of target file
+ * @ ex_start	start physical block number of the extent
+ *		which already moved
+ * @ ex_len	block length of the extent which already moved
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_block_within_rsv(struct inode *inode,
+				ext4_fsblk_t ex_start, int ex_len)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext4_block_alloc_info *block_i;
+	ext4_group_t group_no;
+	ext4_grpblk_t grp_blk;
+	struct ext4_reserve_window_node *rsv;
+
+	block_i = EXT4_I(inode)->i_block_alloc_info;
+	/* Block reservation should be enabled */
+	BUG_ON(!block_i);
+
+	/* Goal size should be set */
+	BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
+
+	rsv = &block_i->rsv_window_node;
+	if (rsv_is_empty(&rsv->rsv_window)) {
+		printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
+		return -ENOSPC;
+	}
+
+	ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
+
+	if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
+	    || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
+		group_no, sb)){
+		printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
+				"not in rsv_window\n", grp_blk,
+				grp_blk + ex_len - 1, group_no);
+		return -ENOSPC;
+	}
+	return 0;
+}
+
+/*
+ * ext4_defrag_reserve_fblocks - Reserve free blocks
+ *				with ext4_defrag_reserve_blocks
+ *
+ * @inode:		To get a block group number
+ * @ext_info:		freeblocks distribution which stored extent-like style
+ *  @ext_info->ext[]	an array of struct ext4_extents_data
+ */
+static int ext4_defrag_reserve_fblocks(struct inode *inode,
+			struct ext4_extents_info *ext_info)
+{
+	ext4_fsblk_t ex_start = 0;
+	int i;
+	int ret = 0;
+	int len = 0;
+
+	for (i = 0; i < ext_info->entries; i++) {
+		ex_start = ext_info->ext[i].start;
+		len = ext_info->ext[i].len;
+
+		ret = ext4_defrag_reserve_blocks(inode, ex_start, len);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Block reservation failed. offset [%llu], "
+				"length [%d]\n", ex_start, len);
+			goto err;
+		}
+		ret = ext4_defrag_block_within_rsv(inode, ex_start, len);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Reservation window is not set. "
+				"offset [%llu], length [%d]\n", ex_start, len);
+			goto err;
+		}
+	}
+	return ret;
+
+err:
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ext4_discard_reservation(inode);
+	up_write(&EXT4_I(inode)->i_data_sem);
+	return ret;
+}
+
+/**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @filp	target file
+ * @ext_info	target extents array to move
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_move_victim(struct file *target_filp,
+			struct ext4_extents_info *ext_info)
+{
+	struct inode *target_inode = target_filp->f_dentry->d_inode;
+	struct super_block *sb = target_inode->i_sb;
+	struct file victim_file;
+	struct dentry victim_dent;
+	struct inode *victim_inode;
+	ext4_fsblk_t goal = ext_info->goal;
+	int ret = 0;
+	int i = 0;
+	struct ext4_extent_data ext;
+	ext4_group_t group;
+	ext4_grpblk_t grp_off;
+
+	/* Setup dummy extent data */
+	ext.len = 0;
+
+	/* Get the inode of the victim file */
+	victim_inode = ext4_iget(sb, ext_info->ino);
+	if (IS_ERR(victim_inode))
+		return PTR_ERR(victim_inode);
+
+	/* Setup file for the victim file */
+	victim_dent.d_inode = victim_inode;
+	victim_file.f_dentry = &victim_dent;
+	victim_file.f_mapping = victim_inode->i_mapping;
+
+	/* Set the goal appropriate offset */
+	if (goal == -1) {
+		ext4_get_group_no_and_offset(victim_inode->i_sb,
+				ext_info->ext[0].start, &group, &grp_off);
+		goal = ext4_group_first_block_no(sb, group + 1);
+	}
+
+	for (i = 0; i < ext_info->entries; i++) {
+		/* Move original blocks to another block group */
+		ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+			ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+		if (ret < 0) {
+			printk(KERN_ERR "ext4 defrag: "
+				"Moving victim file failed. ino [%llu]\n",
+				ext_info->ino);
+			goto err;
+		}
+
+		/* Sync journal blocks before reservation */
+		ret = ext4_force_commit(sb);
+		if (ret) {
+			printk(KERN_ERR "ext4 defrag: "
+				"ext4_force_commit failed(%d)\n", ret);
+			goto err;
+		}
+	}
+
+	iput(victim_inode);
+	return 0;
+err:
+	down_write(&EXT4_I(target_inode)->i_data_sem);
+	ext4_discard_reservation(target_inode);
+	up_write(&EXT4_I(target_inode)->i_data_sem);
+	iput(victim_inode);
+	return ret;
+}
+
+/**
+ * ext4_defrag_fblocks_distribution - Search free blocks distribution
+ *
+ * @inode	target file
+ * @ext_info	ext4_extents_info
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_fblocks_distribution(struct inode *inode,
+			struct ext4_extents_info *ext_info)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct super_block *sb = inode->i_sb;
+	handle_t *handle;
+	ext4_group_t group_no;
+	ext4_grpblk_t start, end;
+	ext4_fsblk_t start_block = 0;
+	int num = 0;
+	int len = 0;
+	int i = 0;
+	int err = 0;
+	int block_set = 0;
+	int extra_block = 0;
+
+	if (!sb) {
+		printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+		return -ENOSPC;
+	}
+
+	group_no = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	start = ext_info->g_offset;
+	end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
+
+	/* We consider about the boot block if bs = 1k */
+	if (sb->s_blocksize == 1024)
+		extra_block = 1;
+
+	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		return err;
+	}
+
+	bitmap_bh = read_block_bitmap(sb, group_no);
+	if (!bitmap_bh) {
+		err = -EIO;
+		goto out;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto out;
+
+	for (i = start; i <= end ; i++) {
+		if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) {
+			len++;
+			/*
+			 * Reset start_block if the free block is
+			 * the head of region.
+			 */
+			if (!block_set) {
+				start_block =
+				 i + group_no * EXT4_BLOCKS_PER_GROUP(sb) +
+				 extra_block;
+				block_set = 1;
+			}
+		} else if (len) {
+			ext_info->ext[num].start = start_block;
+			ext_info->ext[num].len = len;
+			num++;
+			len = 0;
+			block_set = 0;
+			if (num == ext_info->max_entries) {
+				ext_info->g_offset = i + 1;
+				break;
+			}
+		}
+		if ((i == end) && len) {
+			ext_info->ext[num].start = start_block;
+			ext_info->ext[num].len = len;
+			num++;
+		}
+	}
+
+	ext_info->entries = num;
+out:
+	ext4_journal_release_buffer(handle, bitmap_bh);
+	brelse(bitmap_bh);
+
+	if (handle)
+		ext4_journal_stop(handle);
+
+	return err;
+}
+
 int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 			unsigned long arg)
 {
@@ -111,6 +597,74 @@ int ext4_defrag_ioctl(struct inode *inod
 		unlock_kernel();
 
 		return put_user(block, p);
+	} else if (cmd == EXT4_IOC_GROUP_INFO) {
+		struct ext4_group_data_info grp_data;
+
+		if (copy_from_user(&grp_data,
+			(struct ext4_group_data_info __user *)arg,
+			sizeof(grp_data)))
+			return -EFAULT;
+
+		grp_data.s_blocks_per_group =
+			EXT4_BLOCKS_PER_GROUP(inode->i_sb);
+		grp_data.s_inodes_per_group =
+			EXT4_INODES_PER_GROUP(inode->i_sb);
+
+		if (copy_to_user((struct ext4_group_data_info *)arg,
+			&grp_data, sizeof(grp_data)))
+			return -EFAULT;
+	} else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+			sizeof(ext_info)))
+			return -EFAULT;
+
+		BUG_ON(ext_info.ino != inode->i_ino);
+
+		err = ext4_defrag_fblocks_distribution(inode, &ext_info);
+
+		if (!err)
+			err = copy_to_user((struct ext4_extents_info *)arg,
+				&ext_info, sizeof(ext_info));
+	} else if (cmd == EXT4_IOC_EXTENTS_INFO) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+				(struct ext4_extents_info __user *)arg,
+				sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_extents_info(inode->i_sb, &ext_info);
+		if (err >= 0) {
+			if (copy_to_user((struct ext4_extents_info __user *)arg,
+				&ext_info, sizeof(ext_info)))
+				return -EFAULT;
+		}
+	} else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+				(struct ext4_extents_info __user *)arg,
+				sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_reserve_fblocks(inode, &ext_info);
+	} else if (cmd == EXT4_IOC_MOVE_VICTIM) {
+		struct ext4_extents_info ext_info;
+
+		if (copy_from_user(&ext_info,
+			(struct ext4_extents_info __user *)arg,
+			sizeof(ext_info)))
+			return -EFAULT;
+
+		err = ext4_defrag_move_victim(filp, &ext_info);
+
+	} else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
+		down_write(&EXT4_I(inode)->i_data_sem);
+		ext4_discard_reservation(inode);
+		up_write(&EXT4_I(inode)->i_data_sem);
 	} else if (cmd == EXT4_IOC_DEFRAG) {
 		struct ext4_ext_defrag_data defrag;
 
diff -Nrup linux-2.6.25-rc6/fs/ext4/extents.c linux-2.6.25-rc6-fix/fs/ext4/extents.c
--- linux-2.6.25-rc6/fs/ext4/extents.c	2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/extents.c	2008-03-24 20:39:58.000000000 +0900
@@ -183,11 +183,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct i
 static ext4_fsblk_t
 ext4_ext_new_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
-			struct ext4_extent *ex, int *err)
+			struct ext4_extent *ex, int *err,
+			ext4_fsblk_t defrag_goal)
 {
 	ext4_fsblk_t goal, newblock;
 
-	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	if (defrag_goal) {
+		goal = defrag_goal;
+	} else {
+		goal = ext4_ext_find_goal(inode, path,
+				le32_to_cpu(ex->ee_block));
+	}
 	newblock = ext4_new_block(handle, inode, goal, err);
 	return newblock;
 }
@@ -638,7 +644,8 @@ static int ext4_ext_insert_index(handle_
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
-				struct ext4_extent *newext, int at)
+				struct ext4_extent *newext, int at,
+				ext4_fsblk_t defrag_goal)
 {
 	struct buffer_head *bh = NULL;
 	int depth = ext_depth(inode);
@@ -688,7 +695,8 @@ static int ext4_ext_split(handle_t *hand
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
 	for (a = 0; a < depth - at; a++) {
-		newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+		newblock = ext4_ext_new_block(handle, inode, path,
+						newext, &err, defrag_goal);
 		if (newblock == 0)
 			goto cleanup;
 		ablocks[a] = newblock;
@@ -875,7 +883,8 @@ cleanup:
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp = path;
 	struct ext4_extent_header *neh;
@@ -884,7 +893,8 @@ static int ext4_ext_grow_indepth(handle_
 	ext4_fsblk_t newblock;
 	int err = 0;
 
-	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+	newblock = ext4_ext_new_block(handle, inode, path,
+					newext, &err, defrag_goal);
 	if (newblock == 0)
 		return err;
 
@@ -960,7 +970,8 @@ out:
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
-					struct ext4_extent *newext)
+					struct ext4_extent *newext,
+					ext4_fsblk_t defrag_goal)
 {
 	struct ext4_ext_path *curp;
 	int depth, i, err = 0;
@@ -980,7 +991,8 @@ repeat:
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
-		err = ext4_ext_split(handle, inode, path, newext, i);
+		err = ext4_ext_split(handle, inode, path,
+					newext, i, defrag_goal);
 
 		/* refill path */
 		ext4_ext_drop_refs(path);
@@ -991,7 +1003,8 @@ repeat:
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		err = ext4_ext_grow_indepth(handle, inode, path,
+						newext, defrag_goal);
 		if (err)
 			goto out;
 
@@ -1437,6 +1450,19 @@ int ext4_ext_insert_extent(handle_t *han
 				struct ext4_ext_path *path,
 				struct ext4_extent *newext)
 {
+	return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int defrag)
+{
 	struct ext4_extent_header * eh;
 	struct ext4_extent *ex, *fex;
 	struct ext4_extent *nearex; /* nearest extent */
@@ -1444,6 +1470,7 @@ int ext4_ext_insert_extent(handle_t *han
 	int depth, len, err;
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
+	ext4_fsblk_t defrag_goal;
 
 	BUG_ON(ext4_ext_get_actual_len(newext) == 0);
 	depth = ext_depth(inode);
@@ -1504,11 +1531,16 @@ repeat:
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 	}
 
+	if (defrag)
+		defrag_goal = ext_pblock(newext);
+	else
+		defrag_goal = 0;
 	/*
 	 * There is no free space in the found leaf.
 	 * We're gonna add a new leaf in the tree.
 	 */
-	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	err = ext4_ext_create_new_leaf(handle, inode, path,
+					newext, defrag_goal);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);

^ permalink raw reply	[flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-31  8:46 Akira Fujita
  0 siblings, 0 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-31  8:46 UTC (permalink / raw)
  To: cmm
  Cc: Aneesh Kumar K.V, Andreas Dilger, tytso, linux-ext4,
	linux-fsdevel, a-fujita

Mingming Cao wrote:
> On Thu, 2008-03-27 at 19:15 +0900, Akira Fujita wrote:
>> Aneesh Kumar K.V wrote:
>>> On Mon, Mar 24, 2008 at 09:31:03PM +0900, Akira Fujita wrote:
>>>   
>>>> Hi Aneesh,
>>>>     
>>>>> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>>>       
>>>>>>>   
>>>>>>>           
>>>>>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>>>>>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>>>>>> in ext4 online defrag when kernel supports it.
>>>>>>         
>>>>> Can you also look at the sparse warnings with the patches ?
>>>>>
>>>>>       
>>>> I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
>>>> Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
>>>> with old one in the ext4 patch queue? 
>>>>
>>>>     
>>>   CHECK   fs/ext4/defrag.c
>>>   fs/ext4/defrag.c:181:7: warning: cast to restricted type
>>>   fs/ext4/defrag.c:181:7: warning: cast from restricted type
>>>   fs/ext4/defrag.c:613:21: warning: incorrect type in argument 1
>>>   (different address spaces)
>>>   fs/ext4/defrag.c:613:21:    expected void [noderef] <asn:1>*to
>>>   fs/ext4/defrag.c:613:21:    got struct ext4_group_data_info *<noident>
>>>   fs/ext4/defrag.c:629:24: warning: incorrect type in argument 1
>>>   (different address spaces)
>>>   fs/ext4/defrag.c:629:24:    expected void [noderef] <asn:1>*to
>>>   fs/ext4/defrag.c:629:24:    got struct ext4_extents_info *<noident>
>>>   fs/ext4/defrag.c:849:29: warning: potentially expensive pointer
>>>   subtraction
>>>   fs/ext4/defrag.c:912:22: warning: potentially expensive pointer
>>>   subtraction
>>>
>>> you would need http://www.kernel.org/pub/software/devel/sparse/
>>> to find and fix these warnings.
>>>
>>>   
>> Oops, there are a lot of warnings. =-O
>> Thank you for letting me know.
>> I will release fixed defrag patches in a few days.
>
> As part of transition to move ext4 related header files out of
> include/linux, and move to fs/ext4, I also updated the defrag patches to
> adapt this change. You might want to checkout the defrag patches in
> patch queue and merge the updates before release the new version.
>
I see.
I'll use the latest patches in the ext4 patch queue to fix warnings.

BTW, I have a slight question about this warning fix.
The compiler with sparse outputs the warning for the following line.

	slots_range = o_end - o_start + 1;

<warning>
fs/ext4/defrag.c:912:22: warning: potentially expensive pointer subtraction

Then I tried to fix this line as follows.

	slots_range = ((unsigned long)o_end - (unsigned long)o_start + 1)
					/ sizeof(struct ext4_extent);

There is no warning but it seems hard to read a bit.
I have no idea whether I should fix this line or not in this case.  :-\ 

Regards,
Akira

^ permalink raw reply	[flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-24 12:34 Akira Fujita
  0 siblings, 0 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-24 12:34 UTC (permalink / raw)
  To: aneesh.kumar; +Cc: adilger, cmm, tytso, linux-ext4, linux-fsdevel, a-fujita

Hi Aneesh,
> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>   
>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>> in ext4 online defrag when kernel supports it.
>
> Can you also look at the sparse warnings with the patches ?
>
I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
with old one in the ext4 patch queue? 

Regards, Akira

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
---
 defrag.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff -Nurp linux-2.6.25-rc6-full/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6-full/fs/ext4/defrag.c    2008-03-24 20:37:10.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c    2008-03-24 20:40:22.000000000 +0900
@@ -454,7 +454,7 @@ static int ext4_defrag_move_victim(struc
             ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
         if (ret < 0) {
             printk(KERN_ERR "ext4 defrag: "
-                "Moving victim file failed. ino [%lu]\n",
+                "Moving victim file failed. ino [%llu]\n",
                 ext_info->ino);
             goto err;
         }

^ permalink raw reply	[flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-14 12:02 Akira Fujita
  2008-03-14 12:58 ` Andreas Dilger
  0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-14 12:02 UTC (permalink / raw)
  To: cmm, tytso; +Cc: linux-ext4, linux-fsdevel

Hi Mingming,

> Overall I think the header changes probably should go with the patches
> that need those changes, that helps explain why we need this header
> changes and makes the patch compile itself. Right now this patch failed
> to compile alone.
I see.
I will reorganize my patches into meaningful groups.

>>  /*
>> @@ -299,6 +300,14 @@ struct ext4_new_group_data {
>>  #define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
>>  #define EXT4_IOC_SETRSVSZ		_IOW('f', 6, long)
>>  #define EXT4_IOC_MIGRATE		_IO('f', 7)
>> +#define EXT4_IOC_FIBMAP			_IOW('f', 9, ext4_fsblk_t)
>> +#define EXT4_IOC_DEFRAG		_IOW('f', 10, struct ext4_ext_defrag_data)
>> +#define EXT4_IOC_GROUP_INFO	_IOW('f', 11, struct ext4_group_data_info)
>> +#define EXT4_IOC_FREE_BLOCKS_INFO	_IOW('f', 12, struct ext4_extents_info)
>> +#define EXT4_IOC_EXTENTS_INFO		_IOW('f', 13, struct ext4_extents_info)
>> +#define EXT4_IOC_RESERVE_BLOCK		_IOW('f', 14, struct ext4_extents_info)
>> +#define EXT4_IOC_MOVE_VICTIM		_IOW('f', 15, struct ext4_extents_info)
>> +#define EXT4_IOC_BLOCK_RELEASE		_IO('f', 8)
>
> These should go with the last patch in this series, where the ioctl
> commands get implemented
OK.

> Could you add more comments about the tunables below? What they are used
> for?
>
>> +/* Used for defrag */
>> +#define DEFRAG_MAX_ENT         32
>
> This means at most 32 free space extents per block group?
OK.  I will add more comments later.

This means the maximum count of extents(or free space extents) for exchanging 
between kernel-space and user-space at once.
For example, EXT4_IOC_EXTENTS_INFO is called multiple times(per DEFRAG_MAX_ENT)
to get its block distribution if there is a large number of
extents(or free space extents) in the target block group.

>> +#define DEFRAG_FORCE_TRY       1
>> +#define DEFRAG_FORCE_VICTIM    2
>> +#define DEFRAG_FORCE_GATHER    3
>> +
>
> And these tunables are used in the last patch in this series, so it make
> sense to move there too.
OK.

>> +struct ext4_extent_data {
>> +	ext4_lblk_t block;		/* start logical block number */
>> +	ext4_fsblk_t start;		/* start physical block number */
>> +	int len;			/* blocks count */
>> +};
>> +
>
> Not related to defrag, but I would like to consider this as in-core
> extent structure. Maybe we should use this structure in other extents.c,
> instead of sharing the same on-disk extent structure, which needs to
> worry about little endian?

I agree. 
How about renaming this structure from ext4_extent_data to
ext4_extent_info which used the extent in-core structure
and put it into ext4_fs_extents.h? 
Because in-core super_block is ext4_sb_info.

/*
 * This is the extent in-core structure.
 */
struct ext4_extent_info {
	ext4_lblk_t	ee_block;	/* first logical block extent covers */
	int		ee_len;		/* number of blocks covered by extent */
	ext4_fsblk_t	ee_start;	/* first physical block extent covers */
};      

Regards, Akira

^ permalink raw reply	[flat|nested] 15+ messages in thread
[parent not found: <1204931337.14884.43.camel@localhost.localdomain>]

end of thread, other threads:[~2008-03-31  8:46 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-24 12:31 [PATCH 1/5] ext4 online defrag header file changes Akira Fujita
2008-03-25 17:48 ` Aneesh Kumar K.V
2008-03-27 10:15   ` Akira Fujita
2008-03-29  0:49     ` Mingming Cao
  -- strict thread matches above, loose matches on Subject: below --
2008-03-31  8:46 Akira Fujita
2008-03-24 12:34 Akira Fujita
2008-03-14 12:02 Akira Fujita
2008-03-14 12:58 ` Andreas Dilger
2008-03-17  6:28   ` Akira Fujita
2008-03-17 17:08     ` Andreas Dilger
2008-03-24 11:00       ` Akira Fujita
2008-03-24 11:16         ` Aneesh Kumar K.V
2008-03-24 15:00         ` Eric Sandeen
2008-03-25  2:21           ` Akira Fujita
     [not found] <1204931337.14884.43.camel@localhost.localdomain>
2008-03-08  0:18 ` Mingming Cao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).