public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fiemap support for ext3
@ 2008-04-23 19:39 Josef Bacik
  2008-04-23 23:27 ` Andreas Dilger
  0 siblings, 1 reply; 9+ messages in thread
From: Josef Bacik @ 2008-04-23 19:39 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger, sandeen

Hello,

Here is my reworked fiemap patch for ext3.  The generic fiemap handler takes the
get_block_t from the filesystem and does the fiemap that way, so then adding
ext2 support should be a snap, as well as any other fs that wants to use this.
Thanks much,

Signed-off-by: Josef Bacik <jbacik@redhat.com>

Index: linux-2.6.25/fs/ext3/file.c
===================================================================
--- linux-2.6.25.orig/fs/ext3/file.c
+++ linux-2.6.25/fs/ext3/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_
 	.removexattr	= generic_removexattr,
 #endif
 	.permission	= ext3_permission,
+	.fiemap		= ext3_fiemap,
 };
 
Index: linux-2.6.25/fs/ext3/inode.c
===================================================================
--- linux-2.6.25.orig/fs/ext3/inode.c
+++ linux-2.6.25/fs/ext3/inode.c
@@ -36,6 +36,7 @@
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
+#include <linux/fiemap.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -981,6 +982,11 @@ out:
 	return ret;
 }
 
+int ext3_fiemap(struct inode *inode, unsigned long arg)
+{
+	return generic_block_fiemap(inode, arg, ext3_get_block);
+}
+
 /*
  * `handle' can be NULL if create is zero
  */
Index: linux-2.6.25/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.25.orig/include/linux/ext3_fs.h
+++ linux-2.6.25/include/linux/ext3_fs.h
@@ -836,6 +836,7 @@ extern void ext3_truncate (struct inode 
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_get_inode_flags(struct ext3_inode_info *);
 extern void ext3_set_aops(struct inode *inode);
+extern int ext3_fiemap(struct inode *inode, unsigned long arg);
 
 /* ioctl.c */
 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
Index: linux-2.6.25/fs/ioctl.c
===================================================================
--- linux-2.6.25.orig/fs/ioctl.c
+++ linux-2.6.25/fs/ioctl.c
@@ -13,6 +13,8 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
 
 #include <asm/ioctls.h>
 
@@ -99,6 +101,166 @@ static int ioctl_fiemap(struct file *fil
 	return error;
 }
 
+int generic_block_fiemap(struct inode *inode, unsigned long arg,
+			 get_block_t *get_block)
+{
+	struct fiemap *fiemap_s;
+	struct fiemap_extent fiemap_e;
+	struct buffer_head tmp;
+	unsigned int start_blk;
+	unsigned int num_of_extents;
+	long length;
+	char *cur_ext_ptr = (char *)(arg + sizeof(struct fiemap));
+	int ret, hole = 0;
+
+	fiemap_s = kmalloc(sizeof(struct fiemap), GFP_KERNEL);
+	if (!fiemap_s)
+		return -ENOMEM;
+
+	if (copy_from_user(fiemap_s, (struct fiemap __user *)arg,
+			   sizeof(struct fiemap))) {
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * if fm_start is in the middle of the current block, get the next
+	 * block so we don't end up returning a start thats before the given
+	 * fm_start
+	 */
+	start_blk = (fiemap_s->fm_start + (1 << inode->i_blkbits) - 1) >>
+		inode->i_blkbits;
+	num_of_extents = fiemap_s->fm_extent_count;
+
+	if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = 0,
+		};
+		ret = sync_inode(inode, &wbc);
+		if (ret)
+			goto out_free;
+	}
+
+	/* guard against change */
+	mutex_lock(&inode->i_mutex);
+
+	/*
+	 * we want the comparisons to be unsigned, in case somebody passes -1,
+	 * meaning they want they want the entire file, but the result has to be
+	 * signed so we can handle the case where we get more blocks than the
+	 * size of the file
+	 */
+	length = (long)min((unsigned long)fiemap_s->fm_length,
+			   (unsigned long)i_size_read(inode));
+
+	fiemap_s->fm_start = start_blk << inode->i_blkbits;
+	fiemap_s->fm_length = 0;
+
+	memset(&fiemap_e, 0, sizeof(struct fiemap_extent));
+	do {
+		/*
+		 * we set this to length because we want ext3_get_block to
+		 * find as many contiguous blocks as it can
+		 */
+		memset(&tmp, 0, sizeof(struct buffer_head));
+		tmp.b_size = length;
+
+		ret = get_block(inode, start_blk, &tmp, 0);
+		if (ret)
+			break;
+
+		/*
+		 * Hole, we're going to have to walk the inodes blocks until
+		 * find data
+		 */
+		if (!tmp.b_blocknr) {
+
+			if (!hole) {
+				hole = 1;
+				fiemap_e.fe_flags |= FIEMAP_EXTENT_HOLE;
+				fiemap_e.fe_offset = start_blk <<
+					inode->i_blkbits;
+			}
+
+			fiemap_e.fe_length += 1 << inode->i_blkbits;
+			length -= 1 << inode->i_blkbits;
+			start_blk++;
+		} else {
+			if (hole &&
+			    !copy_to_user(cur_ext_ptr, &fiemap_e,
+					  sizeof(struct fiemap_extent))) {
+				cur_ext_ptr += sizeof(struct fiemap_extent);
+				fiemap_s->fm_extent_count++;
+				fiemap_s->fm_length += fiemap_e.fe_length;
+
+				hole = 0;
+				num_of_extents--;
+				memset(&fiemap_e, 0,
+				       sizeof(struct fiemap_extent));
+
+				if (!num_of_extents || length <= 0)
+					break;
+			} else if (hole) {
+				/* copy_to_user failed */
+				ret = -EFAULT;
+				break;
+			}
+
+			length -= tmp.b_size;
+			start_blk += tmp.b_size >> inode->i_blkbits;
+
+			fiemap_e.fe_offset = tmp.b_blocknr <<
+				inode->i_blkbits;
+			fiemap_e.fe_length = tmp.b_size;
+
+			if (length <= 0)
+				fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;
+
+			if (!copy_to_user(cur_ext_ptr, &fiemap_e,
+					  sizeof(struct fiemap_extent))) {
+				cur_ext_ptr += sizeof(struct fiemap_extent);
+				num_of_extents--;
+			} else {
+				ret = -EFAULT;
+				break;
+			}
+
+			fiemap_s->fm_extent_count++;
+			fiemap_s->fm_length += fiemap_e.fe_length;
+
+			memset(&fiemap_e, 0, sizeof(struct fiemap_extent));
+		}
+	} while (length > 0 && num_of_extents);
+
+	mutex_unlock(&inode->i_mutex);
+
+	/* hole at the end of the file */
+	if (hole && !copy_to_user(cur_ext_ptr, &fiemap_e,
+				  sizeof(struct fiemap_extent))) {
+		fiemap_s->fm_extent_count++;
+		fiemap_s->fm_length += fiemap_e.fe_length;
+
+		if (length <= 0)
+			fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;
+
+	} else if (hole) {
+		/* copy to user failed */
+		ret = -EFAULT;
+	}
+
+	if (!ret) {
+		if (copy_to_user((char *)arg, fiemap_s,
+				 sizeof(struct fiemap)))
+			ret = -EFAULT;
+	}
+
+out_free:
+	kfree(fiemap_s);
+	return ret;
+}
+EXPORT_SYMBOL(generic_block_fiemap);
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
Index: linux-2.6.25/include/linux/fs.h
===================================================================
--- linux-2.6.25.orig/include/linux/fs.h
+++ linux-2.6.25/include/linux/fs.h
@@ -1925,6 +1925,8 @@ extern int vfs_fstat(unsigned int, struc
 extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
+extern int generic_block_fiemap(struct inode *inode, unsigned long arg,
+				get_block_t *get_block);
 
 extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-23 19:39 [PATCH] fiemap support for ext3 Josef Bacik
@ 2008-04-23 23:27 ` Andreas Dilger
  2008-04-23 23:48   ` Eric Sandeen
  2008-04-24  2:58   ` Andreas Dilger
  0 siblings, 2 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-23 23:27 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-ext4, sandeen

On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
> Here is my reworked fiemap patch for ext3.  The generic fiemap handler takes the
> get_block_t from the filesystem and does the fiemap that way, so then adding
> ext2 support should be a snap, as well as any other fs that wants to use this.

The generic mechanism is really good, some minor issues still exist.

> +int generic_block_fiemap(struct inode *inode, unsigned long arg,
> +			 get_block_t *get_block)
> +{
> +	/*
> +	 * if fm_start is in the middle of the current block, get the next
> +	 * block so we don't end up returning a start thats before the given
> +	 * fm_start
> +	 */
> +	start_blk = (fiemap_s->fm_start + (1 << inode->i_blkbits) - 1) >>
> +		inode->i_blkbits;

This should return the start of the current block (so the requested fm_start
is included in the returned mapping), instead of the start of the next block.

> +	/*
> +	 * we want the comparisons to be unsigned, in case somebody passes -1,
> +	 * meaning they want they want the entire file, but the result has to be
> +	 * signed so we can handle the case where we get more blocks than the
> +	 * size of the file
> +	 */
> +	length = (long)min((unsigned long)fiemap_s->fm_length,
> +			   (unsigned long)i_size_read(inode));

This might be written as:

	length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));

Also, what about files that have blocks mapped after i_size?

> +		if (!tmp.b_blocknr) {
> +
> +			if (!hole) {

No empty line above "if (!hole)".

> +			fiemap_e.fe_offset = tmp.b_blocknr <<
> +				inode->i_blkbits;

This doesn't need to wrap.

> +			fiemap_e.fe_length = tmp.b_size;
> +
> +			if (length <= 0)
> +				fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;

Should the exit condition should be the first hole (block == 0) after i_size?

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-23 23:27 ` Andreas Dilger
@ 2008-04-23 23:48   ` Eric Sandeen
  2008-04-24  2:56     ` Andreas Dilger
  2008-04-24  2:58   ` Andreas Dilger
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Sandeen @ 2008-04-23 23:48 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Josef Bacik, linux-ext4

Andreas Dilger wrote:
> On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:

>> +	/*
>> +	 * we want the comparisons to be unsigned, in case somebody passes -1,
>> +	 * meaning they want they want the entire file, but the result has to be
>> +	 * signed so we can handle the case where we get more blocks than the
>> +	 * size of the file
>> +	 */
>> +	length = (long)min((unsigned long)fiemap_s->fm_length,
>> +			   (unsigned long)i_size_read(inode));
> 
> This might be written as:
> 
> 	length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));
> 
> Also, what about files that have blocks mapped after i_size?

That'll be tough for ext3, though I guess for a generic interface it
could happen, so I guess it needs to be handled.  Maybe check i_blocks
against i_size, see if i_blocks indicates blocks past EOF?  Hm, I guess
that's not going to work in general; you could be completely sparse up
to an EOF at 100G and have 100M of blocks past that...

I don't know how you'd handle this in the generic case...?

-Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-23 23:48   ` Eric Sandeen
@ 2008-04-24  2:56     ` Andreas Dilger
  0 siblings, 0 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24  2:56 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Josef Bacik, linux-ext4

On Apr 23, 2008  18:48 -0500, Eric Sandeen wrote:
> Andreas Dilger wrote:
> > On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
> >> +	/*
> >> +	 * we want the comparisons to be unsigned, in case somebody passes -1,
> >> +	 * meaning they want they want the entire file, but the result has to be
> >> +	 * signed so we can handle the case where we get more blocks than the
> >> +	 * size of the file
> >> +	 */
> >> +	length = (long)min((unsigned long)fiemap_s->fm_length,
> >> +			   (unsigned long)i_size_read(inode));
> > 
> > This might be written as:
> > 
> > 	length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));
> > 
> > Also, what about files that have blocks mapped after i_size?
> 
> That'll be tough for ext3, though I guess for a generic interface it
> could happen, so I guess it needs to be handled.  

Right, because some filesystems may preallocate blocks beyond i_size to
avoid fragmentation.

> Maybe check i_blocks
> against i_size, see if i_blocks indicates blocks past EOF?  Hm, I guess
> that's not going to work in general; you could be completely sparse up
> to an EOF at 100G and have 100M of blocks past that...

...and there are also indirect blocks, and EA blocks that are not counted
toward i_size.  The issue is that getblock() doesn't have any way of
reporting that it is beyond EOF.  If it was an ext2/ext3-specific mechanism
then it could check in the i_block[] array and in the end of the
{t,d,}indirect blocks to know conclusively whether there are any blocks
beyond EOF.

That said, I don't think the generic interface can know everything about
each filesystem.  My suggestion was that blocks beyond i_size continue
to be mapped until a hole (block == 0) is returned.  It isn't perfect,
but would likely cover 99.9% of the cases where some small number of blocks
(<= 64kB or whatever) were allocated beyond EOF to avoid fragmentation.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-23 23:27 ` Andreas Dilger
  2008-04-23 23:48   ` Eric Sandeen
@ 2008-04-24  2:58   ` Andreas Dilger
  2008-04-24 12:36     ` Josef Bacik
  1 sibling, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24  2:58 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-ext4, sandeen

On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
> Here is my reworked fiemap patch for ext3.  The generic fiemap handler
> takes the get_block_t from the filesystem and does the fiemap that way,
> so then adding ext2 support should be a snap, as well as any other fs
> that wants to use this.

Ah, one other point is that this should also be used by ext4 to map inodes
that are not in extents format, replacing the -EOPNOTSUPP return in that
case.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-24  2:58   ` Andreas Dilger
@ 2008-04-24 12:36     ` Josef Bacik
  2008-04-24 12:52       ` Eric Sandeen
  2008-04-24 14:59       ` Andreas Dilger
  0 siblings, 2 replies; 9+ messages in thread
From: Josef Bacik @ 2008-04-24 12:36 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Josef Bacik, linux-ext4, sandeen

On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
> On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
> > Here is my reworked fiemap patch for ext3.  The generic fiemap handler
> > takes the get_block_t from the filesystem and does the fiemap that way,
> > so then adding ext2 support should be a snap, as well as any other fs
> > that wants to use this.
> 
> Ah, one other point is that this should also be used by ext4 to map inodes
> that are not in extents format, replacing the -EOPNOTSUPP return in that
> case.
>

Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
specify wether the inode you are mapping does truly have extents or if its just
more of a block map.  Do you think something like that would be usefull?  Thank
you,

Josef 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-24 12:52       ` Eric Sandeen
@ 2008-04-24 12:46         ` Josef Bacik
  0 siblings, 0 replies; 9+ messages in thread
From: Josef Bacik @ 2008-04-24 12:46 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Josef Bacik, Andreas Dilger, linux-ext4

On Thu, Apr 24, 2008 at 07:52:12AM -0500, Eric Sandeen wrote:
> Josef Bacik wrote:
> > On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
> >> On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
> >>> Here is my reworked fiemap patch for ext3.  The generic fiemap handler
> >>> takes the get_block_t from the filesystem and does the fiemap that way,
> >>> so then adding ext2 support should be a snap, as well as any other fs
> >>> that wants to use this.
> >> Ah, one other point is that this should also be used by ext4 to map inodes
> >> that are not in extents format, replacing the -EOPNOTSUPP return in that
> >> case.
> >>
> > 
> > Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
> > specify wether the inode you are mapping does truly have extents or if its just
> > more of a block map.  Do you think something like that would be usefull?  Thank
> > you,
> > 
> > Josef 
> 
> I don't see that in itself as useful; you care about file layout, but
> why would you care about the filesystem's mechanism for tracking that
> layout?
>

idk, just thought it may be usefull in the case of ext4 where you potentially
have both formats.  Mostly thinking out loud while we can still change things.
Thanks,

Josef 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-24 12:36     ` Josef Bacik
@ 2008-04-24 12:52       ` Eric Sandeen
  2008-04-24 12:46         ` Josef Bacik
  2008-04-24 14:59       ` Andreas Dilger
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Sandeen @ 2008-04-24 12:52 UTC (permalink / raw)
  To: Josef Bacik; +Cc: Andreas Dilger, linux-ext4

Josef Bacik wrote:
> On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
>> On Apr 23, 2008  15:39 -0400, Josef Bacik wrote:
>>> Here is my reworked fiemap patch for ext3.  The generic fiemap handler
>>> takes the get_block_t from the filesystem and does the fiemap that way,
>>> so then adding ext2 support should be a snap, as well as any other fs
>>> that wants to use this.
>> Ah, one other point is that this should also be used by ext4 to map inodes
>> that are not in extents format, replacing the -EOPNOTSUPP return in that
>> case.
>>
> 
> Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
> specify wether the inode you are mapping does truly have extents or if its just
> more of a block map.  Do you think something like that would be usefull?  Thank
> you,
> 
> Josef 

I don't see that in itself as useful; you care about file layout, but
why would you care about the filesystem's mechanism for tracking that
layout?

-Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] fiemap support for ext3
  2008-04-24 12:36     ` Josef Bacik
  2008-04-24 12:52       ` Eric Sandeen
@ 2008-04-24 14:59       ` Andreas Dilger
  1 sibling, 0 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24 14:59 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-ext4, sandeen

On Apr 24, 2008  08:36 -0400, Josef Bacik wrote:
> I'm wondering if we should add a flag to the FIEMAP stuff to specify
> wether the inode you are mapping does truly have extents or if its just
> more of a block map.  Do you think something like that would be usefull?

You can already do lsattr (EXT*_IOC_GETFLAGS) on a file to determine if
it is using extents:

# lsattr /mnt/tmp/O/0/d0/459808 
-------------e /mnt/tmp/O/0/d0/459808


Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-04-24 15:00 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-23 19:39 [PATCH] fiemap support for ext3 Josef Bacik
2008-04-23 23:27 ` Andreas Dilger
2008-04-23 23:48   ` Eric Sandeen
2008-04-24  2:56     ` Andreas Dilger
2008-04-24  2:58   ` Andreas Dilger
2008-04-24 12:36     ` Josef Bacik
2008-04-24 12:52       ` Eric Sandeen
2008-04-24 12:46         ` Josef Bacik
2008-04-24 14:59       ` Andreas Dilger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox