* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-24 12:31 Akira Fujita
2008-03-25 17:48 ` Aneesh Kumar K.V
0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-24 12:31 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
[-- Attachment #1: Type: text/plain, Size: 1322 bytes --]
Hi Aneesh,
> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>
>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>> in ext4 online defrag when kernel supports it.
>
> Can you also look at the sparse warnings with the patches ?
>
I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
with old one in the ext4 patch queue?
Regards, Akira
Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
---
defrag.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -Nurp linux-2.6.25-rc6-full/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6-full/fs/ext4/defrag.c 2008-03-24 20:37:10.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c 2008-03-24 20:40:22.000000000 +0900
@@ -454,7 +454,7 @@ static int ext4_defrag_move_victim(struc
ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
if (ret < 0) {
printk(KERN_ERR "ext4 defrag: "
- "Moving victim file failed. ino [%lu]\n",
+ "Moving victim file failed. ino [%llu]\n",
ext_info->ino);
goto err;
}
[-- Attachment #2: ext4-online-defrag-free-space-fragmentation.patch --]
[-- Type: application/octet-stream, Size: 22591 bytes --]
Free space fragmentation functions
From: Akira Fujita <a-fujita@rs.jp.nec.com>
Defrag tries to move other files to make sufficient space
and reallocates the contiguous blocks for the target file.
Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
balloc.c | 12 -
defrag.c | 554 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
extents.c | 52 ++++-
3 files changed, 602 insertions(+), 16 deletions(-)
diff -Nrup linux-2.6.25-rc6/fs/ext4/balloc.c linux-2.6.25-rc6-fix/fs/ext4/balloc.c
--- linux-2.6.25-rc6/fs/ext4/balloc.c 2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/balloc.c 2008-03-24 20:39:58.000000000 +0900
@@ -383,7 +383,7 @@ restart:
* If the goal block is within the reservation window, return 1;
* otherwise, return 0;
*/
-static int
+int
goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
ext4_group_t group, struct super_block *sb)
{
@@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_bl
* from the filesystem reservation window rb tree. Must be called with
* rsv_lock hold.
*/
-static void rsv_window_remove(struct super_block *sb,
+void rsv_window_remove(struct super_block *sb,
struct ext4_reserve_window_node *rsv)
{
rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -503,7 +503,7 @@ static void rsv_window_remove(struct sup
*
* returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
*/
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
+inline int rsv_is_empty(struct ext4_reserve_window *rsv)
{
/* a valid reservation end block could not be 0 */
return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -869,7 +869,7 @@ static int ext4_test_allocatable(ext4_gr
* bitmap on disk and the last-committed copy in journal, until we find a
* bit free in both bitmaps.
*/
-static ext4_grpblk_t
+ext4_grpblk_t
bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
ext4_grpblk_t maxblocks)
{
@@ -1239,7 +1239,7 @@ static int find_next_reservable_window(
* @bitmap_bh: the block group block bitmap
*
*/
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
ext4_grpblk_t grp_goal, struct super_block *sb,
ext4_group_t group, struct buffer_head *bitmap_bh)
{
@@ -1383,7 +1383,7 @@ retry:
* expand the reservation window size if necessary on a best-effort
* basis before ext4_new_blocks() tries to allocate blocks,
*/
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
+void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
struct super_block *sb, int size)
{
struct ext4_reserve_window_node *next_rsv;
diff -Nrup linux-2.6.25-rc6/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6/fs/ext4/defrag.c 2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c 2008-03-24 20:40:22.000000000 +0900
@@ -20,6 +20,12 @@
#include <linux/ext4_fs_extents.h>
#include "group.h"
+#define EXT_SET_EXTENT_DATA(src, dest) do { \
+ dest.block = le32_to_cpu(src->ee_block); \
+ dest.start = ext_pblock(src); \
+ dest.len = le16_to_cpu(src->ee_len); \
+ } while (0)
+
/**
* ext4_defrag_next_extent - Search for the next extent and set it to "extent"
*
@@ -89,6 +95,486 @@ ext4_defrag_next_extent(struct inode *in
return 1;
}
+/**
+ * ext4_defrag_extents_info - Get extents information
+ *
+ * @sb: for ext4_iget()
+ * @ext_info: pointer to ext4_extents_info
+ * @ext_info->ino describe an inode which is used to get
+ * extent information
+ * @ext_info->max_entries: defined by DEFRAG_MAX_ENT
+ * @ext_info->entries: amount of extents (output)
+ * @ext_info->ext[]: array of extent (output)
+ * @ext_info->offset: starting block offset of targeted extent
+ * (file relative)
+ *
+ * This function returns 0 if the next extent(s) exists,
+ * or returns 1 if the next extent doesn't exist,
+ * otherwise returns error value.
+ */
+static int ext4_defrag_extents_info(struct super_block *sb,
+ struct ext4_extents_info *ext_info)
+{
+ struct ext4_ext_path *path = NULL;
+ struct ext4_extent *ext = NULL;
+ struct inode *inode = NULL;
+ ext4_lblk_t offset = ext_info->f_offset;
+ int max_entries = ext_info->max_entries;
+ int depth = 0;
+ int entries = 0;
+ int err = 0;
+ int ret = 0;
+
+ inode = ext4_iget(sb, ext_info->ino);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+
+ /* Return -ENOENT if a file does not exist */
+ if ((!inode->i_nlink) || (inode->i_ino < 11) ||
+ !S_ISREG(inode->i_mode)) {
+ ext_info->entries = 0;
+ err = -ENOENT;
+ goto out;
+ }
+
+ path = ext4_ext_find_extent(inode, offset, NULL);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ path = NULL;
+ goto out;
+ }
+ depth = ext_depth(inode);
+
+ /* Skip the 0 size file */
+ if (path[depth].p_ext == NULL) {
+ ext_info->entries = 0;
+ goto out;
+ }
+ ext = path[depth].p_ext;
+ EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+ entries = 1;
+
+ /*
+ * The ioctl repeats this loop 'max_entries' times.
+ * So we have to call this function again if @inode had
+ * more the number of extents than 'max_entries'.
+ */
+ while (entries < max_entries) {
+ ret = ext4_defrag_next_extent(inode, path, &ext);
+ if (ret == 0) {
+ /* Found the next extent (it means not the last one) */
+ EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]);
+ entries++;
+
+ /*
+ * In case @inode has > 'max_entries' extents,
+ * we must call this function again and restart from
+ * 'max_entries * n + 1'th extent.
+ * 'n' is the number of calling this function
+ * at the same @inode.
+ */
+ if (entries == max_entries) {
+ ext_info->f_offset =
+ le32_to_cpu(ext->ee_block) +
+ le32_to_cpu(ext->ee_len);
+ /* Check the extent is the last one or not */
+ ret =
+ ext4_defrag_next_extent(inode, path, &ext);
+ if (ret == 1) {
+ err = ret;
+ } else if (ret < 0) {
+ /* Failed to get the next extent */
+ err = ret;
+ goto out;
+ }
+ break;
+ }
+
+ } else if (ret == 1) {
+ /* The extent is the last one */
+ ext_info->f_offset = 0;
+ err = ret;
+ break;
+ } else {
+ /* Failed to get the next extent */
+ err = ret;
+ goto out;
+ }
+ }
+
+ ext_info->entries = entries;
+
+out:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ up_write(&EXT4_I(inode)->i_data_sem);
+ iput(inode);
+ return err;
+}
+
+/**
+ * ext4_defrag_reserve_blocks - Reserve blocks for defrag
+ *
+ * @inode target inode
+ * @goal block reservation goal
+ * @len blocks count to reserve
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+
+static int
+ext4_defrag_reserve_blocks(struct inode *inode, ext4_fsblk_t goal, int len)
+{
+ struct super_block *sb = NULL;
+ handle_t *handle;
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_block_alloc_info *block_i;
+ struct ext4_reserve_window_node *my_rsv = NULL;
+ unsigned short windowsz = 0;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_target_blk;
+ int err = 0;
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+
+ handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ handle = NULL;
+ goto out;
+ }
+
+ if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
+ ext4_init_block_alloc_info(inode);
+ } else if (!S_ISREG(inode->i_mode)) {
+ printk(KERN_ERR "ext4 defrag: Invalid file type\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+ err = -ENXIO;
+ goto out;
+ }
+ ext4_get_group_no_and_offset(sb, goal, &group_no,
+ &grp_target_blk);
+
+ block_i = EXT4_I(inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ windowsz = block_i->rsv_window_node.rsv_goal_size;
+ /* Goal size should be set */
+ BUG_ON(!windowsz);
+
+
+ my_rsv = &block_i->rsv_window_node;
+
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+ err = ext4_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
+ group_no, bitmap_bh);
+ if (err < 0) {
+ printk(KERN_ERR "ext4 defrag: Block reservation failed."
+ "offset [%d], bg[%lu]\n",
+ grp_target_blk, group_no);
+ ext4_discard_reservation(inode);
+ goto out;
+ } else {
+ if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
+ try_to_extend_reservation(my_rsv, sb,
+ len - EXT4_DEFAULT_RESERVE_BLOCKS);
+
+ }
+
+out:
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext4_journal_release_buffer(handle, bitmap_bh);
+ brelse(bitmap_bh);
+
+ if (handle)
+ ext4_journal_stop(handle);
+
+ return err;
+}
+
+/**
+ * ext4_defrag_block_within_rsv - Is target extent reserved ?
+ *
+ * @ inode inode of target file
+ * @ ex_start start physical block number of the extent
+ * which already moved
+ * @ ex_len block length of the extent which already moved
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_block_within_rsv(struct inode *inode,
+ ext4_fsblk_t ex_start, int ex_len)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ext4_block_alloc_info *block_i;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_blk;
+ struct ext4_reserve_window_node *rsv;
+
+ block_i = EXT4_I(inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ /* Goal size should be set */
+ BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
+
+ rsv = &block_i->rsv_window_node;
+ if (rsv_is_empty(&rsv->rsv_window)) {
+ printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
+ return -ENOSPC;
+ }
+
+ ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
+
+ if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
+ || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
+ group_no, sb)){
+ printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
+ "not in rsv_window\n", grp_blk,
+ grp_blk + ex_len - 1, group_no);
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+/*
+ * ext4_defrag_reserve_fblocks - Reserve free blocks
+ * with ext4_defrag_reserve_blocks
+ *
+ * @inode: To get a block group number
+ * @ext_info: freeblocks distribution which stored extent-like style
+ * @ext_info->ext[] an array of struct ext4_extents_data
+ */
+static int ext4_defrag_reserve_fblocks(struct inode *inode,
+ struct ext4_extents_info *ext_info)
+{
+ ext4_fsblk_t ex_start = 0;
+ int i;
+ int ret = 0;
+ int len = 0;
+
+ for (i = 0; i < ext_info->entries; i++) {
+ ex_start = ext_info->ext[i].start;
+ len = ext_info->ext[i].len;
+
+ ret = ext4_defrag_reserve_blocks(inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Block reservation failed. offset [%llu], "
+ "length [%d]\n", ex_start, len);
+ goto err;
+ }
+ ret = ext4_defrag_block_within_rsv(inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Reservation window is not set. "
+ "offset [%llu], length [%d]\n", ex_start, len);
+ goto err;
+ }
+ }
+ return ret;
+
+err:
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_reservation(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ return ret;
+}
+
+/**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @filp target file
+ * @ext_info target extents array to move
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_move_victim(struct file *target_filp,
+ struct ext4_extents_info *ext_info)
+{
+ struct inode *target_inode = target_filp->f_dentry->d_inode;
+ struct super_block *sb = target_inode->i_sb;
+ struct file victim_file;
+ struct dentry victim_dent;
+ struct inode *victim_inode;
+ ext4_fsblk_t goal = ext_info->goal;
+ int ret = 0;
+ int i = 0;
+ struct ext4_extent_data ext;
+ ext4_group_t group;
+ ext4_grpblk_t grp_off;
+
+ /* Setup dummy extent data */
+ ext.len = 0;
+
+ /* Get the inode of the victim file */
+ victim_inode = ext4_iget(sb, ext_info->ino);
+ if (IS_ERR(victim_inode))
+ return PTR_ERR(victim_inode);
+
+ /* Setup file for the victim file */
+ victim_dent.d_inode = victim_inode;
+ victim_file.f_dentry = &victim_dent;
+ victim_file.f_mapping = victim_inode->i_mapping;
+
+ /* Set the goal appropriate offset */
+ if (goal == -1) {
+ ext4_get_group_no_and_offset(victim_inode->i_sb,
+ ext_info->ext[0].start, &group, &grp_off);
+ goal = ext4_group_first_block_no(sb, group + 1);
+ }
+
+ for (i = 0; i < ext_info->entries; i++) {
+ /* Move original blocks to another block group */
+ ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+ ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Moving victim file failed. ino [%llu]\n",
+ ext_info->ino);
+ goto err;
+ }
+
+ /* Sync journal blocks before reservation */
+ ret = ext4_force_commit(sb);
+ if (ret) {
+ printk(KERN_ERR "ext4 defrag: "
+ "ext4_force_commit failed(%d)\n", ret);
+ goto err;
+ }
+ }
+
+ iput(victim_inode);
+ return 0;
+err:
+ down_write(&EXT4_I(target_inode)->i_data_sem);
+ ext4_discard_reservation(target_inode);
+ up_write(&EXT4_I(target_inode)->i_data_sem);
+ iput(victim_inode);
+ return ret;
+}
+
+/**
+ * ext4_defrag_fblocks_distribution - Search free blocks distribution
+ *
+ * @inode target file
+ * @ext_info ext4_extents_info
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_fblocks_distribution(struct inode *inode,
+ struct ext4_extents_info *ext_info)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct super_block *sb = inode->i_sb;
+ handle_t *handle;
+ ext4_group_t group_no;
+ ext4_grpblk_t start, end;
+ ext4_fsblk_t start_block = 0;
+ int num = 0;
+ int len = 0;
+ int i = 0;
+ int err = 0;
+ int block_set = 0;
+ int extra_block = 0;
+
+ if (!sb) {
+ printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+ return -ENOSPC;
+ }
+
+ group_no = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+ start = ext_info->g_offset;
+ end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
+
+ /* We consider about the boot block if bs = 1k */
+ if (sb->s_blocksize == 1024)
+ extra_block = 1;
+
+ handle = ext4_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ return err;
+ }
+
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh) {
+ err = -EIO;
+ goto out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+ err = ext4_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ for (i = start; i <= end ; i++) {
+ if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) {
+ len++;
+ /*
+ * Reset start_block if the free block is
+ * the head of region.
+ */
+ if (!block_set) {
+ start_block =
+ i + group_no * EXT4_BLOCKS_PER_GROUP(sb) +
+ extra_block;
+ block_set = 1;
+ }
+ } else if (len) {
+ ext_info->ext[num].start = start_block;
+ ext_info->ext[num].len = len;
+ num++;
+ len = 0;
+ block_set = 0;
+ if (num == ext_info->max_entries) {
+ ext_info->g_offset = i + 1;
+ break;
+ }
+ }
+ if ((i == end) && len) {
+ ext_info->ext[num].start = start_block;
+ ext_info->ext[num].len = len;
+ num++;
+ }
+ }
+
+ ext_info->entries = num;
+out:
+ ext4_journal_release_buffer(handle, bitmap_bh);
+ brelse(bitmap_bh);
+
+ if (handle)
+ ext4_journal_stop(handle);
+
+ return err;
+}
+
int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -111,6 +597,74 @@ int ext4_defrag_ioctl(struct inode *inod
unlock_kernel();
return put_user(block, p);
+ } else if (cmd == EXT4_IOC_GROUP_INFO) {
+ struct ext4_group_data_info grp_data;
+
+ if (copy_from_user(&grp_data,
+ (struct ext4_group_data_info __user *)arg,
+ sizeof(grp_data)))
+ return -EFAULT;
+
+ grp_data.s_blocks_per_group =
+ EXT4_BLOCKS_PER_GROUP(inode->i_sb);
+ grp_data.s_inodes_per_group =
+ EXT4_INODES_PER_GROUP(inode->i_sb);
+
+ if (copy_to_user((struct ext4_group_data_info *)arg,
+ &grp_data, sizeof(grp_data)))
+ return -EFAULT;
+ } else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ BUG_ON(ext_info.ino != inode->i_ino);
+
+ err = ext4_defrag_fblocks_distribution(inode, &ext_info);
+
+ if (!err)
+ err = copy_to_user((struct ext4_extents_info *)arg,
+ &ext_info, sizeof(ext_info));
+ } else if (cmd == EXT4_IOC_EXTENTS_INFO) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_extents_info(inode->i_sb, &ext_info);
+ if (err >= 0) {
+ if (copy_to_user((struct ext4_extents_info __user *)arg,
+ &ext_info, sizeof(ext_info)))
+ return -EFAULT;
+ }
+ } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_reserve_fblocks(inode, &ext_info);
+ } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_move_victim(filp, &ext_info);
+
+ } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_reservation(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
} else if (cmd == EXT4_IOC_DEFRAG) {
struct ext4_ext_defrag_data defrag;
diff -Nrup linux-2.6.25-rc6/fs/ext4/extents.c linux-2.6.25-rc6-fix/fs/ext4/extents.c
--- linux-2.6.25-rc6/fs/ext4/extents.c 2008-03-24 20:50:23.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/extents.c 2008-03-24 20:39:58.000000000 +0900
@@ -183,11 +183,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct i
static ext4_fsblk_t
ext4_ext_new_block(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *ex, int *err)
+ struct ext4_extent *ex, int *err,
+ ext4_fsblk_t defrag_goal)
{
ext4_fsblk_t goal, newblock;
- goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+ if (defrag_goal) {
+ goal = defrag_goal;
+ } else {
+ goal = ext4_ext_find_goal(inode, path,
+ le32_to_cpu(ex->ee_block));
+ }
newblock = ext4_new_block(handle, inode, goal, err);
return newblock;
}
@@ -638,7 +644,8 @@ static int ext4_ext_insert_index(handle_
*/
static int ext4_ext_split(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext, int at)
+ struct ext4_extent *newext, int at,
+ ext4_fsblk_t defrag_goal)
{
struct buffer_head *bh = NULL;
int depth = ext_depth(inode);
@@ -688,7 +695,8 @@ static int ext4_ext_split(handle_t *hand
/* allocate all needed blocks */
ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
for (a = 0; a < depth - at; a++) {
- newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ newblock = ext4_ext_new_block(handle, inode, path,
+ newext, &err, defrag_goal);
if (newblock == 0)
goto cleanup;
ablocks[a] = newblock;
@@ -875,7 +883,8 @@ cleanup:
*/
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext)
+ struct ext4_extent *newext,
+ ext4_fsblk_t defrag_goal)
{
struct ext4_ext_path *curp = path;
struct ext4_extent_header *neh;
@@ -884,7 +893,8 @@ static int ext4_ext_grow_indepth(handle_
ext4_fsblk_t newblock;
int err = 0;
- newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ newblock = ext4_ext_new_block(handle, inode, path,
+ newext, &err, defrag_goal);
if (newblock == 0)
return err;
@@ -960,7 +970,8 @@ out:
*/
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext)
+ struct ext4_extent *newext,
+ ext4_fsblk_t defrag_goal)
{
struct ext4_ext_path *curp;
int depth, i, err = 0;
@@ -980,7 +991,8 @@ repeat:
if (EXT_HAS_FREE_INDEX(curp)) {
/* if we found index with free entry, then use that
* entry: create all needed subtree and add new leaf */
- err = ext4_ext_split(handle, inode, path, newext, i);
+ err = ext4_ext_split(handle, inode, path,
+ newext, i, defrag_goal);
/* refill path */
ext4_ext_drop_refs(path);
@@ -991,7 +1003,8 @@ repeat:
err = PTR_ERR(path);
} else {
/* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, path, newext);
+ err = ext4_ext_grow_indepth(handle, inode, path,
+ newext, defrag_goal);
if (err)
goto out;
@@ -1437,6 +1450,19 @@ int ext4_ext_insert_extent(handle_t *han
struct ext4_ext_path *path,
struct ext4_extent *newext)
{
+ return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int defrag)
+{
struct ext4_extent_header * eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
@@ -1444,6 +1470,7 @@ int ext4_ext_insert_extent(handle_t *han
int depth, len, err;
ext4_lblk_t next;
unsigned uninitialized = 0;
+ ext4_fsblk_t defrag_goal;
BUG_ON(ext4_ext_get_actual_len(newext) == 0);
depth = ext_depth(inode);
@@ -1504,11 +1531,16 @@ repeat:
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
}
+ if (defrag)
+ defrag_goal = ext_pblock(newext);
+ else
+ defrag_goal = 0;
/*
* There is no free space in the found leaf.
* We're gonna add a new leaf in the tree.
*/
- err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+ err = ext4_ext_create_new_leaf(handle, inode, path,
+ newext, defrag_goal);
if (err)
goto cleanup;
depth = ext_depth(inode);
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-24 12:31 [PATCH 1/5] ext4 online defrag header file changes Akira Fujita
@ 2008-03-25 17:48 ` Aneesh Kumar K.V
2008-03-27 10:15 ` Akira Fujita
0 siblings, 1 reply; 15+ messages in thread
From: Aneesh Kumar K.V @ 2008-03-25 17:48 UTC (permalink / raw)
To: Akira Fujita; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
On Mon, Mar 24, 2008 at 09:31:03PM +0900, Akira Fujita wrote:
> Hi Aneesh,
> > On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
> >>>
> >> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
> >> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
> >> in ext4 online defrag when kernel supports it.
> >
> > Can you also look at the sparse warnings with the patches ?
> >
> I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
> Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
> with old one in the ext4 patch queue?
>
CHECK fs/ext4/defrag.c
fs/ext4/defrag.c:181:7: warning: cast to restricted type
fs/ext4/defrag.c:181:7: warning: cast from restricted type
fs/ext4/defrag.c:613:21: warning: incorrect type in argument 1
(different address spaces)
fs/ext4/defrag.c:613:21: expected void [noderef] <asn:1>*to
fs/ext4/defrag.c:613:21: got struct ext4_group_data_info *<noident>
fs/ext4/defrag.c:629:24: warning: incorrect type in argument 1
(different address spaces)
fs/ext4/defrag.c:629:24: expected void [noderef] <asn:1>*to
fs/ext4/defrag.c:629:24: got struct ext4_extents_info *<noident>
fs/ext4/defrag.c:849:29: warning: potentially expensive pointer
subtraction
fs/ext4/defrag.c:912:22: warning: potentially expensive pointer
subtraction
you would need http://www.kernel.org/pub/software/devel/sparse/
to find and fix these warnings.
-aneesh
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-25 17:48 ` Aneesh Kumar K.V
@ 2008-03-27 10:15 ` Akira Fujita
2008-03-29 0:49 ` Mingming Cao
0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-27 10:15 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
Aneesh Kumar K.V wrote:
> On Mon, Mar 24, 2008 at 09:31:03PM +0900, Akira Fujita wrote:
>
>> Hi Aneesh,
>>
>>> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>
>>>>>
>>>>>
>>>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>>>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>>>> in ext4 online defrag when kernel supports it.
>>>>
>>> Can you also look at the sparse warnings with the patches ?
>>>
>>>
>> I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
>> Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
>> with old one in the ext4 patch queue?
>>
>>
>
> CHECK fs/ext4/defrag.c
> fs/ext4/defrag.c:181:7: warning: cast to restricted type
> fs/ext4/defrag.c:181:7: warning: cast from restricted type
> fs/ext4/defrag.c:613:21: warning: incorrect type in argument 1
> (different address spaces)
> fs/ext4/defrag.c:613:21: expected void [noderef] <asn:1>*to
> fs/ext4/defrag.c:613:21: got struct ext4_group_data_info *<noident>
> fs/ext4/defrag.c:629:24: warning: incorrect type in argument 1
> (different address spaces)
> fs/ext4/defrag.c:629:24: expected void [noderef] <asn:1>*to
> fs/ext4/defrag.c:629:24: got struct ext4_extents_info *<noident>
> fs/ext4/defrag.c:849:29: warning: potentially expensive pointer
> subtraction
> fs/ext4/defrag.c:912:22: warning: potentially expensive pointer
> subtraction
>
> you would need http://www.kernel.org/pub/software/devel/sparse/
> to find and fix these warnings.
>
>
Oops, there are a lot of warnings. =-O
Thank you for letting me know.
I will release fixed defrag patches in a few days.
Regards, Akira
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-27 10:15 ` Akira Fujita
@ 2008-03-29 0:49 ` Mingming Cao
0 siblings, 0 replies; 15+ messages in thread
From: Mingming Cao @ 2008-03-29 0:49 UTC (permalink / raw)
To: Akira Fujita
Cc: Aneesh Kumar K.V, Andreas Dilger, tytso, linux-ext4,
linux-fsdevel
On Thu, 2008-03-27 at 19:15 +0900, Akira Fujita wrote:
> Aneesh Kumar K.V wrote:
> > On Mon, Mar 24, 2008 at 09:31:03PM +0900, Akira Fujita wrote:
> >
> >> Hi Aneesh,
> >>
> >>> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
> >>>
> >>>>>
> >>>>>
> >>>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
> >>>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
> >>>> in ext4 online defrag when kernel supports it.
> >>>>
> >>> Can you also look at the sparse warnings with the patches ?
> >>>
> >>>
> >> I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
> >> Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
> >> with old one in the ext4 patch queue?
> >>
> >>
> >
> > CHECK fs/ext4/defrag.c
> > fs/ext4/defrag.c:181:7: warning: cast to restricted type
> > fs/ext4/defrag.c:181:7: warning: cast from restricted type
> > fs/ext4/defrag.c:613:21: warning: incorrect type in argument 1
> > (different address spaces)
> > fs/ext4/defrag.c:613:21: expected void [noderef] <asn:1>*to
> > fs/ext4/defrag.c:613:21: got struct ext4_group_data_info *<noident>
> > fs/ext4/defrag.c:629:24: warning: incorrect type in argument 1
> > (different address spaces)
> > fs/ext4/defrag.c:629:24: expected void [noderef] <asn:1>*to
> > fs/ext4/defrag.c:629:24: got struct ext4_extents_info *<noident>
> > fs/ext4/defrag.c:849:29: warning: potentially expensive pointer
> > subtraction
> > fs/ext4/defrag.c:912:22: warning: potentially expensive pointer
> > subtraction
> >
> > you would need http://www.kernel.org/pub/software/devel/sparse/
> > to find and fix these warnings.
> >
> >
> Oops, there are a lot of warnings. =-O
> Thank you for letting me know.
> I will release fixed defrag patches in a few days.
As part of transition to move ext4 related header files out of
include/linux, and move to fs/ext4, I also updated the defrag patches to
adapt this change. You might want to checkout the defrag patches in
patch queue and merge the updates before release the new version.
Cheers,
Mingming
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-31 8:46 Akira Fujita
0 siblings, 0 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-31 8:46 UTC (permalink / raw)
To: cmm
Cc: Aneesh Kumar K.V, Andreas Dilger, tytso, linux-ext4,
linux-fsdevel, a-fujita
Mingming Cao wrote:
> On Thu, 2008-03-27 at 19:15 +0900, Akira Fujita wrote:
>> Aneesh Kumar K.V wrote:
>>> On Mon, Mar 24, 2008 at 09:31:03PM +0900, Akira Fujita wrote:
>>>
>>>> Hi Aneesh,
>>>>
>>>>> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>>>
>>>>>>>
>>>>>>>
>>>>>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>>>>>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>>>>>> in ext4 online defrag when kernel supports it.
>>>>>>
>>>>> Can you also look at the sparse warnings with the patches ?
>>>>>
>>>>>
>>>> I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
>>>> Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
>>>> with old one in the ext4 patch queue?
>>>>
>>>>
>>> CHECK fs/ext4/defrag.c
>>> fs/ext4/defrag.c:181:7: warning: cast to restricted type
>>> fs/ext4/defrag.c:181:7: warning: cast from restricted type
>>> fs/ext4/defrag.c:613:21: warning: incorrect type in argument 1
>>> (different address spaces)
>>> fs/ext4/defrag.c:613:21: expected void [noderef] <asn:1>*to
>>> fs/ext4/defrag.c:613:21: got struct ext4_group_data_info *<noident>
>>> fs/ext4/defrag.c:629:24: warning: incorrect type in argument 1
>>> (different address spaces)
>>> fs/ext4/defrag.c:629:24: expected void [noderef] <asn:1>*to
>>> fs/ext4/defrag.c:629:24: got struct ext4_extents_info *<noident>
>>> fs/ext4/defrag.c:849:29: warning: potentially expensive pointer
>>> subtraction
>>> fs/ext4/defrag.c:912:22: warning: potentially expensive pointer
>>> subtraction
>>>
>>> you would need http://www.kernel.org/pub/software/devel/sparse/
>>> to find and fix these warnings.
>>>
>>>
>> Oops, there are a lot of warnings. =-O
>> Thank you for letting me know.
>> I will release fixed defrag patches in a few days.
>
> As part of transition to move ext4 related header files out of
> include/linux, and move to fs/ext4, I also updated the defrag patches to
> adapt this change. You might want to checkout the defrag patches in
> patch queue and merge the updates before release the new version.
>
I see.
I'll use the latest patches in the ext4 patch queue to fix warnings.
BTW, I have a slight question about this warning fix.
The compiler with sparse outputs the warning for the following line.
slots_range = o_end - o_start + 1;
<warning>
fs/ext4/defrag.c:912:22: warning: potentially expensive pointer subtraction
Then I tried to fix this line as follows.
slots_range = ((unsigned long)o_end - (unsigned long)o_start + 1)
/ sizeof(struct ext4_extent);
There is no warning but it seems hard to read a bit.
I have no idea whether I should fix this line or not in this case. :-\
Regards,
Akira
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-24 12:34 Akira Fujita
0 siblings, 0 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-24 12:34 UTC (permalink / raw)
To: aneesh.kumar; +Cc: adilger, cmm, tytso, linux-ext4, linux-fsdevel, a-fujita
Hi Aneesh,
> On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>>
>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>> in ext4 online defrag when kernel supports it.
>
> Can you also look at the sparse warnings with the patches ?
>
I have already released the fixed patch before, but it is not in the ext4 patch queue yet.
Mingming, could you replace new ext4-online-defrag-free-space-fragmentation.patch
with old one in the ext4 patch queue?
Regards, Akira
Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
---
defrag.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -Nurp linux-2.6.25-rc6-full/fs/ext4/defrag.c linux-2.6.25-rc6-fix/fs/ext4/defrag.c
--- linux-2.6.25-rc6-full/fs/ext4/defrag.c 2008-03-24 20:37:10.000000000 +0900
+++ linux-2.6.25-rc6-fix/fs/ext4/defrag.c 2008-03-24 20:40:22.000000000 +0900
@@ -454,7 +454,7 @@ static int ext4_defrag_move_victim(struc
ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
if (ret < 0) {
printk(KERN_ERR "ext4 defrag: "
- "Moving victim file failed. ino [%lu]\n",
+ "Moving victim file failed. ino [%llu]\n",
ext_info->ino);
goto err;
}
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
@ 2008-03-14 12:02 Akira Fujita
2008-03-14 12:58 ` Andreas Dilger
0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-14 12:02 UTC (permalink / raw)
To: cmm, tytso; +Cc: linux-ext4, linux-fsdevel
Hi Mingming,
> Overall I think the header changes probably should go with the patches
> that need those changes, that helps explain why we need this header
> changes and makes the patch compile itself. Right now this patch failed
> to compile alone.
I see.
I will reorganize my patches into meaningful groups.
>> /*
>> @@ -299,6 +300,14 @@ struct ext4_new_group_data {
>> #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
>> #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
>> #define EXT4_IOC_MIGRATE _IO('f', 7)
>> +#define EXT4_IOC_FIBMAP _IOW('f', 9, ext4_fsblk_t)
>> +#define EXT4_IOC_DEFRAG _IOW('f', 10, struct ext4_ext_defrag_data)
>> +#define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info)
>> +#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
>> +#define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info)
>> +#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 14, struct ext4_extents_info)
>> +#define EXT4_IOC_MOVE_VICTIM _IOW('f', 15, struct ext4_extents_info)
>> +#define EXT4_IOC_BLOCK_RELEASE _IO('f', 8)
>
> These should go with the last patch in this series, where the ioctl
> commands get implemented
OK.
> Could you add more comments about the tunables below? What they are used
> for?
>
>> +/* Used for defrag */
>> +#define DEFRAG_MAX_ENT 32
>
> This means at most 32 free space extents per block group?
OK. I will add more comments later.
This means the maximum count of extents(or free space extents) for exchanging
between kernel-space and user-space at once.
For example, EXT4_IOC_EXTENTS_INFO is called multiple times(per DEFRAG_MAX_ENT)
to get its block distribution if there is a large number of
extents(or free space extents) in the target block group.
>> +#define DEFRAG_FORCE_TRY 1
>> +#define DEFRAG_FORCE_VICTIM 2
>> +#define DEFRAG_FORCE_GATHER 3
>> +
>
> And these tunables are used in the last patch in this series, so it make
> sense to move there too.
OK.
>> +struct ext4_extent_data {
>> + ext4_lblk_t block; /* start logical block number */
>> + ext4_fsblk_t start; /* start physical block number */
>> + int len; /* blocks count */
>> +};
>> +
>
> Not related to defrag, but I would like to consider this as in-core
> extent structure. Maybe we should use this structure in other extents.c,
> instead of sharing the same on-disk extent structure, which needs to
> worry about little endian?
I agree.
How about renaming this structure from ext4_extent_data to
ext4_extent_info which used the extent in-core structure
and put it into ext4_fs_extents.h?
Because in-core super_block is ext4_sb_info.
/*
* This is the extent in-core structure.
*/
struct ext4_extent_info {
ext4_lblk_t ee_block; /* first logical block extent covers */
int ee_len; /* number of blocks covered by extent */
ext4_fsblk_t ee_start; /* first physical block extent covers */
};
Regards, Akira
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-14 12:02 Akira Fujita
@ 2008-03-14 12:58 ` Andreas Dilger
2008-03-17 6:28 ` Akira Fujita
0 siblings, 1 reply; 15+ messages in thread
From: Andreas Dilger @ 2008-03-14 12:58 UTC (permalink / raw)
To: Akira Fujita; +Cc: cmm, tytso, linux-ext4, linux-fsdevel
On Mar 14, 2008 21:02 +0900, Akira Fujita wrote:
> How about renaming this structure from ext4_extent_data to
> ext4_extent_info which used the extent in-core structure
> and put it into ext4_fs_extents.h?
> Because in-core super_block is ext4_sb_info.
>
> /*
> * This is the extent in-core structure.
> */
> struct ext4_extent_info {
> ext4_lblk_t ee_block; /* first logical block extent covers */
> int ee_len; /* number of blocks covered by extent */
> ext4_fsblk_t ee_start; /* first physical block extent covers */
> };
Would it be possible to use the FIEMAP interface to get the file mapping
data to userspace? That would avoid the need to have a second ioctl
with nearly the same data.
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-14 12:58 ` Andreas Dilger
@ 2008-03-17 6:28 ` Akira Fujita
2008-03-17 17:08 ` Andreas Dilger
0 siblings, 1 reply; 15+ messages in thread
From: Akira Fujita @ 2008-03-17 6:28 UTC (permalink / raw)
To: Andreas Dilger; +Cc: cmm, tytso, linux-ext4, linux-fsdevel
Hi Andreas,
>> How about renaming this structure from ext4_extent_data to
>> ext4_extent_info which used the extent in-core structure
>> and put it into ext4_fs_extents.h?
>> Because in-core super_block is ext4_sb_info.
>>
>> /*
>> * This is the extent in-core structure.
>> */
>> struct ext4_extent_info {
>> ext4_lblk_t ee_block; /* first logical block extent covers */
>> int ee_len; /* number of blocks covered by extent */
>> ext4_fsblk_t ee_start; /* first physical block extent covers */
>> };
>>
>
> Would it be possible to use the FIEMAP interface to get the file mapping
> data to userspace? That would avoid the need to have a second ioctl
> with nearly the same data.
>
Reducing ioctl makes sense.
But EXT4_EXTENTS_INFO needs to get the file mapping data
with inode number not file name.
So we should keep ext4_extents_info structure
or put ino entry in fiemap structure.
Regards, Akira
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-17 6:28 ` Akira Fujita
@ 2008-03-17 17:08 ` Andreas Dilger
2008-03-24 11:00 ` Akira Fujita
0 siblings, 1 reply; 15+ messages in thread
From: Andreas Dilger @ 2008-03-17 17:08 UTC (permalink / raw)
To: Akira Fujita; +Cc: cmm, tytso, linux-ext4, linux-fsdevel
On Mar 17, 2008 15:28 +0900, Akira Fujita wrote:
> Hi Andreas,
>>> How about renaming this structure from ext4_extent_data to
>>> ext4_extent_info which used the extent in-core structure
>>> and put it into ext4_fs_extents.h? Because in-core super_block is
>>> ext4_sb_info.
>>>
>>> /*
>>> * This is the extent in-core structure.
>>> */
>>> struct ext4_extent_info {
>>> ext4_lblk_t ee_block; /* first logical block extent covers */
>>> int ee_len; /* number of blocks covered by extent */
>>> ext4_fsblk_t ee_start; /* first physical block extent covers */
>>> };
>>
>> Would it be possible to use the FIEMAP interface to get the file mapping
>> data to userspace? That would avoid the need to have a second ioctl
>> with nearly the same data.
>
> Reducing ioctl makes sense.
> But EXT4_EXTENTS_INFO needs to get the file mapping data
> with inode number not file name.
> So we should keep ext4_extents_info structure
> or put ino entry in fiemap structure.
We introduced a special ioctl for this which would allow root to perform
ioctl() on any inode by inode number. We call this EXT4_IOC_WRAPPER, and
it is called on the root inode and specifies the inode number and real
ioctl number before the rest of the ioctl data.
struct ext4_ioctl_wrapper {
__u32 ioctl_cmd;
__u32 padding;
__u64 ioctl_ino;
char ioctl_data[0];
};
More patch is available, but it isn't in a self-contained patch.
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-17 17:08 ` Andreas Dilger
@ 2008-03-24 11:00 ` Akira Fujita
2008-03-24 11:16 ` Aneesh Kumar K.V
2008-03-24 15:00 ` Eric Sandeen
0 siblings, 2 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-24 11:00 UTC (permalink / raw)
To: Andreas Dilger; +Cc: cmm, tytso, linux-ext4, linux-fsdevel, Akira Fujita
> On Mar 17, 2008 15:28 +0900, Akira Fujita wrote:
>
>> Hi Andreas,
>>
>>>> How about renaming this structure from ext4_extent_data to
>>>> ext4_extent_info which used the extent in-core structure
>>>> and put it into ext4_fs_extents.h? Because in-core super_block is
>>>> ext4_sb_info.
>>>>
>>>> /*
>>>> * This is the extent in-core structure.
>>>> */
>>>> struct ext4_extent_info {
>>>> ext4_lblk_t ee_block; /* first logical block extent covers */
>>>> int ee_len; /* number of blocks covered by extent */
>>>> ext4_fsblk_t ee_start; /* first physical block extent covers */
>>>> };
>>>>
>>> Would it be possible to use the FIEMAP interface to get the file mapping
>>> data to userspace? That would avoid the need to have a second ioctl
>>> with nearly the same data.
>>>
>>
>> Reducing ioctl makes sense.
>> But EXT4_EXTENTS_INFO needs to get the file mapping data
>> with inode number not file name.
>> So we should keep ext4_extents_info structure
>> or put ino entry in fiemap structure.
>>
>
> We introduced a special ioctl for this which would allow root to perform
> ioctl() on any inode by inode number. We call this EXT4_IOC_WRAPPER, and
> it is called on the root inode and specifies the inode number and real
> ioctl number before the rest of the ioctl data.
>
> struct ext4_ioctl_wrapper {
> __u32 ioctl_cmd;
> __u32 padding;
> __u64 ioctl_ino;
> char ioctl_data[0];
> };
>
> More patch is available, but it isn't in a self-contained patch.
>
>
Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
in ext4 online defrag when kernel supports it.
Regards, Akira
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-24 11:00 ` Akira Fujita
@ 2008-03-24 11:16 ` Aneesh Kumar K.V
2008-03-24 15:00 ` Eric Sandeen
1 sibling, 0 replies; 15+ messages in thread
From: Aneesh Kumar K.V @ 2008-03-24 11:16 UTC (permalink / raw)
To: Akira Fujita; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
On Mon, Mar 24, 2008 at 08:00:54PM +0900, Akira Fujita wrote:
>>
>>
> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
> in ext4 online defrag when kernel supports it.
Can you also look at the sparse warnings with the patches ?
-aneesh
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-24 11:00 ` Akira Fujita
2008-03-24 11:16 ` Aneesh Kumar K.V
@ 2008-03-24 15:00 ` Eric Sandeen
2008-03-25 2:21 ` Akira Fujita
1 sibling, 1 reply; 15+ messages in thread
From: Eric Sandeen @ 2008-03-24 15:00 UTC (permalink / raw)
To: Akira Fujita; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
Akira Fujita wrote:
> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
> in ext4 online defrag when kernel supports it.
It's not, yet, but I could probably put it there.
It is not ready for upstream yet at all, but it does have a good amount
of functionality.
I'll try to get something submitted for the patch queue soon...
(I decided to slow down a bit on the fiemap work, and concentrate on
helping to get e2fsprogs ready with ext4 support)
-Eric
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/5] ext4 online defrag header file changes
2008-03-24 15:00 ` Eric Sandeen
@ 2008-03-25 2:21 ` Akira Fujita
0 siblings, 0 replies; 15+ messages in thread
From: Akira Fujita @ 2008-03-25 2:21 UTC (permalink / raw)
To: Eric Sandeen; +Cc: Andreas Dilger, cmm, tytso, linux-ext4, linux-fsdevel
Eric Sandeen wrote:
> Akira Fujita wrote:
>
>
>> Is EXT4_IOC_FIEMAP going to be added to the ext4 patch queue?
>> I will try to use EXT4_IOC_FIEMAP instead of EXT4_IOC_EXTENTS_INFO
>> in ext4 online defrag when kernel supports it.
>>
>
> It's not, yet, but I could probably put it there.
>
>
I see.
> It is not ready for upstream yet at all, but it does have a good amount
> of functionality.
>
> I'll try to get something submitted for the patch queue soon...
>
> (I decided to slow down a bit on the fiemap work, and concentrate on
> helping to get e2fsprogs ready with ext4 support)
>
>
No problem since e2fsprogs is one of the most important things for ext4.
Regards, Akira
^ permalink raw reply [flat|nested] 15+ messages in thread
[parent not found: <1204931337.14884.43.camel@localhost.localdomain>]
* Re: [PATCH 1/5] ext4 online defrag header file changes
[not found] <1204931337.14884.43.camel@localhost.localdomain>
@ 2008-03-08 0:18 ` Mingming Cao
0 siblings, 0 replies; 15+ messages in thread
From: Mingming Cao @ 2008-03-08 0:18 UTC (permalink / raw)
To: Akira Fujita, tytso, linux-ext4, linux-fsdevel
> [PATCH 1/5] ext4 online defrag header file changes
> - Header file changes used by online defrag.
> Header file changes used by online defrag.
>
Overall I think the header changes probably should go with the patches
that need those changes, that helps explain why we need this header
changes and makes the patch compile itself. Right now this patch failed
to compile alone.
> Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
> Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
> --
> fs/ext4/Makefile | 2 +-
> include/linux/ext4_fs.h | 68 +++++++++++++++++++++++++++++++++++++++
> include/linux/ext4_fs_extents.h | 10 ++++++
> 3 files changed, 79 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index ac6fa8c..8028102 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
>
> ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
> ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
> - ext4_jbd2.o migrate.o mballoc.o
> + ext4_jbd2.o migrate.o mballoc.o defrag.o
>
> ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
> ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
> diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
> index 893182e..03b4154 100644
> --- a/include/linux/ext4_fs.h
> +++ b/include/linux/ext4_fs.h
> @@ -95,6 +95,7 @@ struct ext4_allocation_request {
> unsigned long len;
> /* flags. see above EXT4_MB_HINT_* */
> unsigned long flags;
> + long long excepted_group;
> };
>
> /*
> @@ -299,6 +300,14 @@ struct ext4_new_group_data {
> #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
> #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
> #define EXT4_IOC_MIGRATE _IO('f', 7)
> +#define EXT4_IOC_FIBMAP _IOW('f', 9, ext4_fsblk_t)
> +#define EXT4_IOC_DEFRAG _IOW('f', 10, struct ext4_ext_defrag_data)
> +#define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info)
> +#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
> +#define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info)
> +#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 14, struct ext4_extents_info)
> +#define EXT4_IOC_MOVE_VICTIM _IOW('f', 15, struct ext4_extents_info)
> +#define EXT4_IOC_BLOCK_RELEASE _IO('f', 8)
These should go with the last patch in this series, where the ioctl
commands get implemented
> /*
> * ioctl commands in 32 bit emulation
> @@ -316,6 +325,42 @@ struct ext4_new_group_data {
> #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
> #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
>
Could you add more comments about the tunables below? What they are used
for?
> +/* Used for defrag */
> +#define DEFRAG_MAX_ENT 32
This means at most 32 free space extents per block group?
> +#define DEFRAG_FORCE_TRY 1
> +#define DEFRAG_FORCE_VICTIM 2
> +#define DEFRAG_FORCE_GATHER 3
> +
And these tunables are used in the last patch in this series, so it make
sense to move there too.
> +struct ext4_extent_data {
> + ext4_lblk_t block; /* start logical block number */
> + ext4_fsblk_t start; /* start physical block number */
> + int len; /* blocks count */
> +};
> +
Not related to defrag, but I would like to consider this as in-core
extent structure. Maybe we should use this structure in other extents.c,
instead of sharing the same on-disk extent structure, which needs to
worry about little endian?
> +struct ext4_ext_defrag_data {
> + ext4_lblk_t start_offset; /* start offset to defrag in blocks */
> + ext4_lblk_t defrag_size; /* size of defrag in blocks */
> + ext4_fsblk_t goal; /* block offset for allocation */
> + int flag; /* free space mode flag */
> + struct ext4_extent_data ext;
> +};
> +
> +struct ext4_group_data_info {
> + int s_blocks_per_group; /* blocks per group */
> + int s_inodes_per_group; /* inodes per group */
> +};
> +
> +struct ext4_extents_info {
> + unsigned long long ino; /* inode number */
> + int max_entries; /* maximum extents count */
> + int entries; /* extent number/count */
> + ext4_lblk_t f_offset; /* file offset */
> + ext4_grpblk_t g_offset; /* group offset */
> + ext4_fsblk_t goal; /* block offset for allocation */
> + struct ext4_extent_data ext[DEFRAG_MAX_ENT];
> +};
> +
> +#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */
>
> /*
> * Mount options
> @@ -991,6 +1036,17 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
> extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
> extern void ext4_init_block_alloc_info(struct inode *);
> extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
> +extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
> + struct super_block *, int);
> +extern int alloc_new_reservation(struct ext4_reserve_window_node *,
> + ext4_grpblk_t, struct super_block *,
> + ext4_group_t, struct buffer_head *);
> +extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
> + struct buffer_head *, ext4_grpblk_t);
> +extern int rsv_is_empty(struct ext4_reserve_window *rsv);
> +extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
> + ext4_grpblk_t grp_goal, ext4_group_t group,
> + struct super_block *sb);
>
Later I found out why you need to export those functions, but I think
this is not used with mballoc. I will comment later.
> /* dir.c */
> extern int ext4_check_dir_entry(const char *, struct inode *,
> @@ -1000,6 +1056,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
> __u32 minor_hash,
> struct ext4_dir_entry_2 *dirent);
> extern void ext4_htree_free_dir_info(struct dir_private_info *p);
> +extern sector_t ext4_bmap(struct address_space *mapping, sector_t block);
>
> /* fsync.c */
> extern int ext4_sync_file (struct file *, struct dentry *, int);
> @@ -1061,6 +1118,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
> extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
> struct address_space *mapping, loff_t from);
> extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
> +extern int ext4_get_block(struct inode *inode, sector_t iblock,
> + struct buffer_head *bh_result, int create);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
> @@ -1109,6 +1168,14 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
> struct ext4_group_desc *bg, ext4_fsblk_t blk);
> extern void ext4_inode_table_set(struct super_block *sb,
> struct ext4_group_desc *bg, ext4_fsblk_t blk);
> +/* extents.c */
> +extern handle_t *ext4_ext_journal_restart(handle_t *handle, int needed);
> +/* defrag.c */
> +extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
> + ext4_lblk_t defrag_size, ext4_fsblk_t goal,
> + int flag, struct ext4_extent_data *ext);
> +extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
> + unsigned long);
>
> static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
> {
> @@ -1226,6 +1293,7 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
> sector_t block, unsigned long max_blocks,
> struct buffer_head *bh, int create,
> int extend_disksize);
> +extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start);
> #endif /* __KERNEL__ */
>
> #endif /* _LINUX_EXT4_FS_H */
> diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
> index 1285c58..6fb42b1 100644
> --- a/include/linux/ext4_fs_extents.h
> +++ b/include/linux/ext4_fs_extents.h
> @@ -228,5 +228,15 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
> extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
> ext4_lblk_t *, ext4_fsblk_t *);
> extern void ext4_ext_drop_refs(struct ext4_ext_path *);
> +extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
> +extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
> +extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
> + struct ext4_ext_path *path,
> + ext4_lblk_t block);
> +extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
> + struct ext4_ext_path *path,
> + struct ext4_extent *newext, int defrag);
> +extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
> +
> #endif /* _LINUX_EXT4_EXTENTS */
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2008-03-31 8:46 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-24 12:31 [PATCH 1/5] ext4 online defrag header file changes Akira Fujita
2008-03-25 17:48 ` Aneesh Kumar K.V
2008-03-27 10:15 ` Akira Fujita
2008-03-29 0:49 ` Mingming Cao
-- strict thread matches above, loose matches on Subject: below --
2008-03-31 8:46 Akira Fujita
2008-03-24 12:34 Akira Fujita
2008-03-14 12:02 Akira Fujita
2008-03-14 12:58 ` Andreas Dilger
2008-03-17 6:28 ` Akira Fujita
2008-03-17 17:08 ` Andreas Dilger
2008-03-24 11:00 ` Akira Fujita
2008-03-24 11:16 ` Aneesh Kumar K.V
2008-03-24 15:00 ` Eric Sandeen
2008-03-25 2:21 ` Akira Fujita
[not found] <1204931337.14884.43.camel@localhost.localdomain>
2008-03-08 0:18 ` Mingming Cao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).