All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 4/8] main function of defrag and ioctl implementation
@ 2008-04-04 11:19 Akira Fujita
  0 siblings, 0 replies; only message in thread
From: Akira Fujita @ 2008-04-04 11:19 UTC (permalink / raw)
  To: linux-ext4, Theodore Tso, Mingming Cao, Aneesh Kumar K.V
  Cc: linux-fsdevel, Akira Fujita

ext4: online defrag-- Main function of defrag and ioctl implementation

From: Akira Fujita <a-fujita@rs.jp.nec.com>

Create the temporary inode and do defrag per
defrag_size (defalut 64MB).

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/defrag.c |  289 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ext4.h   |   19 ++++-
 fs/ext4/ioctl.c  |    3 +
 3 files changed, 310 insertions(+), 1 deletions(-)

diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 09b2c56..baa04d9 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2008, NEC Software Tohoku, Ltd.
+ * Written by Takashi Sato <t-sato@yk.jp.nec.com>
+ *            Akira Fujita <a-fujita@rs.jp.nec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
 /* Online defragmentation for EXT4 */

 #include <linux/quotaops.h>
@@ -74,6 +89,27 @@ ext4_defrag_next_extent(struct inode *inode,
 	return 1;
 }

+int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+			unsigned long arg)
+{
+	int err = 0;
+	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+			return -EINVAL;
+
+	if (cmd == EXT4_IOC_DEFRAG) {
+		struct ext4_ext_defrag_data defrag;
+
+		if (copy_from_user(&defrag,
+			(struct ext4_ext_defrag_data __user *)arg,
+			sizeof(defrag)))
+			return -EFAULT;
+		err = ext4_defrag(filp, defrag.start_offset,
+				defrag.defrag_size);
+	}
+
+	return err;
+}
+
 /**
  * ext4_defrag_merge_across_blocks - Merge extents across leaf block
  *
@@ -1022,3 +1058,256 @@ ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
 out:
 	return ret;
 }
+
+/**
+ * ext4_defrag - Defrag the specified range of a file
+ *
+ * @filp:		pointer to file
+ * @block_start:	starting offset to defrag in blocks
+ * @defrag_size:	size of defrag in blocks
+ *
+ * This function returns the number of blocks if succeeded, otherwise
+ * returns error value.
+ */
+int
+ext4_defrag(struct file *filp, ext4_lblk_t block_start,
+		ext4_lblk_t defrag_size)
+{
+	struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
+	struct ext4_ext_path *path = NULL, *holecheck_path = NULL;
+	struct ext4_extent *ext_prev = NULL, *ext_cur = NULL, *ext_dummy = NULL;
+	handle_t *handle;
+	ext4_lblk_t block_end = block_start + defrag_size - 1;
+	ext4_lblk_t seq_blocks = 0, seq_start = 0;
+	ext4_lblk_t add_blocks = 0;
+	ext4_lblk_t file_end = (inode->i_size - 1) >> inode->i_blkbits;
+	pgoff_t page_offset = 0, dest_offset = 0, seq_end_page = 0;
+	int ret = 0, depth = 0, last_extent = 0, seq_extents = 0;
+
+	/* ext4 online defrag supports only 4KB block size */
+	if (inode->i_sb->s_blocksize != DEFRAG_BLOCK_SIZE) {
+		printk(KERN_ERR "ext4 defrag: ext4 online defrag supports "
+				"only 4KB block size for the moment.\n");
+		return -EINVAL;
+	}
+
+	/* ext4 online defrag needs mballoc mount option. */
+	if (!test_opt(inode->i_sb, MBALLOC)) {
+		printk(KERN_ERR "ext4 defrag: multiblock allocation "
+				"is disabled\n");
+		return -EINVAL;
+	}
+
+	if (file_end < block_end)
+		defrag_size -= block_end - file_end;
+
+	mutex_lock(&inode->i_mutex);
+	down_write(&EXT4_I(inode)->i_data_sem);
+
+	path = ext4_ext_find_extent(inode, block_start, NULL);
+	if (IS_ERR(path)) {
+		ret = PTR_ERR(path);
+		path = NULL;
+		goto out;
+	}
+
+	/* Get path structure to check the hole */
+	holecheck_path = ext4_ext_find_extent(inode, block_start, NULL);
+	if (IS_ERR(holecheck_path)) {
+		ret = PTR_ERR(holecheck_path);
+		holecheck_path = NULL;
+		goto out;
+	}
+
+	depth = ext_depth(inode);
+	ext_cur = holecheck_path[depth].p_ext;
+	if (ext_cur == NULL)
+		goto out;
+
+	/*
+	 * Get proper extent whose ee_block is beyond block_start
+	 * if block_start was within the hole.
+	 */
+	if (le32_to_cpu(ext_cur->ee_block) +
+		le16_to_cpu(ext_cur->ee_len) - 1 < block_start) {
+		last_extent = ext4_defrag_next_extent(inode, holecheck_path,
+				&ext_cur);
+		if (last_extent < 0) {
+			ret = last_extent;
+			goto out;
+		}
+		last_extent = ext4_defrag_next_extent(inode, path, &ext_dummy);
+		if (last_extent < 0) {
+			ret = last_extent;
+			goto out;
+		}
+	}
+	seq_extents = 1;
+	seq_start = le32_to_cpu(ext_cur->ee_block);
+
+	/* No blocks within the specified range. */
+	if (le32_to_cpu(ext_cur->ee_block) > block_end) {
+		printk(KERN_INFO "ext4 defrag: The specified range of file"
+				" may be the hole\n");
+		goto out;
+	}
+
+	/* Adjust start blocks */
+	add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
+			 le16_to_cpu(ext_cur->ee_len), block_end + 1) -
+		     max(le32_to_cpu(ext_cur->ee_block), block_start);
+
+	while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+		seq_blocks += add_blocks;
+
+		handle = ext4_journal_start(inode,
+			EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+			EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+			2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + 1);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out;
+		}
+		tmp_inode = ext4_new_inode(handle,
+			inode->i_sb->s_root->d_inode, S_IFREG);
+		if (IS_ERR(tmp_inode)) {
+			ret = -ENOMEM;
+			ext4_journal_stop(handle);
+			tmp_inode = NULL;
+			goto out;
+		}
+
+		i_size_write(tmp_inode, i_size_read(inode));
+		tmp_inode->i_nlink = 0;
+		ext4_ext_tree_init(handle, tmp_inode);
+		ext4_orphan_add(handle, tmp_inode);
+		ext4_journal_stop(handle);
+
+		/* Adjust tail blocks */
+		if (seq_start + seq_blocks - 1 > block_end)
+			seq_blocks = block_end - seq_start + 1;
+
+		ext_prev = ext_cur;
+		last_extent = ext4_defrag_next_extent(inode, holecheck_path,
+				&ext_cur);
+		if (last_extent < 0) {
+			ret = last_extent;
+			break;
+		}
+		if (!last_extent)
+			seq_extents++;
+		add_blocks = le16_to_cpu(ext_cur->ee_len);
+
+		/*
+		 * Extend the length of contiguous block (seq_blocks)
+		 * if extents are contiguous.
+		 */
+		if ((le32_to_cpu(ext_prev->ee_block) +
+				le16_to_cpu(ext_prev->ee_len) ==
+				le32_to_cpu(ext_cur->ee_block) &&
+				block_end >= le32_to_cpu(ext_cur->ee_block) &&
+				!last_extent)) {
+			if (tmp_inode) {
+				iput(tmp_inode);
+				tmp_inode = NULL;
+			}
+			continue;
+		}
+
+		/* Found an isolated block */
+		if (seq_extents == 1) {
+			seq_start = le32_to_cpu(ext_cur->ee_block);
+			goto CLEANUP;
+		}
+
+		ret = ext4_defrag_new_extent_tree(inode, tmp_inode, path,
+			seq_start, seq_blocks, block_start);
+
+		if (ret < 0) {
+			break;
+		} else if (ret == 1) {
+			ret = 0;
+			seq_start = le32_to_cpu(ext_cur->ee_block);
+			goto CLEANUP;
+		}
+
+		page_offset = seq_start >>
+				(PAGE_CACHE_SHIFT - inode->i_blkbits);
+		seq_end_page = (seq_start + seq_blocks - 1) >>
+				(PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+		dest_offset = 0;
+		seq_start = le32_to_cpu(ext_cur->ee_block);
+
+		/*
+		 * Discard all preallocations.
+		 * This is provisional solution.
+		 * When true ext4_mb_return_to_preallocation() is
+		 * implemented, this will be removed.
+		 */
+		ext4_mb_discard_inode_preallocations(inode);
+
+		while (page_offset <= seq_end_page) {
+			/* Swap original branches with new branches */
+			ret = ext4_defrag_partial(tmp_inode, filp,
+					page_offset, dest_offset);
+			if (ret < 0)
+				goto out;
+
+			page_offset++;
+			dest_offset++;
+		}
+
+		/* Decrease buffer counter */
+		if (holecheck_path)
+			ext4_ext_drop_refs(holecheck_path);
+		holecheck_path =
+			ext4_ext_find_extent(inode, seq_start, holecheck_path);
+		if (IS_ERR(holecheck_path)) {
+			ret = PTR_ERR(holecheck_path);
+			holecheck_path = NULL;
+			break;
+		}
+		depth = holecheck_path->p_depth;
+
+CLEANUP:
+		/* Decrease buffer counter */
+		if (path)
+			ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode, seq_start, path);
+		if (IS_ERR(path)) {
+			ret = PTR_ERR(path);
+			path = NULL;
+			break;
+		}
+
+		ext_cur = holecheck_path[depth].p_ext;
+		add_blocks = le16_to_cpu(ext_cur->ee_len);
+		seq_blocks = 0;
+		dest_offset = 0;
+		seq_extents = 1;
+
+		if (tmp_inode) {
+			iput(tmp_inode);
+			tmp_inode = NULL;
+		}
+	}
+
+out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	if (holecheck_path) {
+		ext4_ext_drop_refs(holecheck_path);
+		kfree(holecheck_path);
+	}
+
+	up_write(&EXT4_I(inode)->i_data_sem);
+	mutex_unlock(&inode->i_mutex);
+
+	if (tmp_inode)
+		iput(tmp_inode);
+
+	return (ret ? ret : defrag_size);
+}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8dc174f..18c1fcf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -298,6 +298,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
 #define EXT4_IOC_SETRSVSZ		_IOW('f', 6, long)
 #define EXT4_IOC_MIGRATE		_IO('f', 7)
+#define EXT4_IOC_DEFRAG		_IOW('f', 10, struct ext4_ext_defrag_data)

 /*
  * ioctl commands in 32 bit emulation
@@ -315,8 +316,19 @@ struct ext4_new_group_data {
 #define EXT4_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
 #define EXT4_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION

-#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */
+/*
+ * Will go away.
+ * ext4 online defrag supports only 4KB block size.
+ */
+#define DEFRAG_BLOCK_SIZE	4096

+struct ext4_ext_defrag_data {
+	ext4_lblk_t start_offset;	/* start offset to defrag in blocks */
+	ext4_lblk_t defrag_size;	/* size of defrag in blocks */
+	ext4_fsblk_t goal;		/* block offset for allocation */
+};
+
+#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */

 /*
  *  Mount options
@@ -1114,6 +1126,11 @@ extern void ext4_inode_table_set(struct super_block *sb,
 				 struct ext4_group_desc *bg, ext4_fsblk_t blk);
 /* extents.c */
 extern handle_t *ext4_ext_journal_restart(handle_t *handle, int needed);
+/* defrag.c */
+extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
+			ext4_lblk_t defrag_size);
+extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
+				unsigned long);

 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0d7430a..98b6f4a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -231,6 +231,9 @@ flags_err:

 		return err;
 	}
+	case EXT4_IOC_DEFRAG: {
+		return ext4_defrag_ioctl(inode, filp, cmd, arg);
+	}
 	case EXT4_IOC_GROUP_ADD: {
 		struct ext4_new_group_data input;
 		struct super_block *sb = inode->i_sb;


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2008-04-04 11:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-04 11:19 [RFC][PATCH 4/8] main function of defrag and ioctl implementation Akira Fujita

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.