* [PATCH] fiemap support for ext3
@ 2008-04-23 19:39 Josef Bacik
2008-04-23 23:27 ` Andreas Dilger
0 siblings, 1 reply; 9+ messages in thread
From: Josef Bacik @ 2008-04-23 19:39 UTC (permalink / raw)
To: linux-ext4; +Cc: adilger, sandeen
Hello,
Here is my reworked fiemap patch for ext3. The generic fiemap handler takes the
get_block_t from the filesystem and does the fiemap that way, so then adding
ext2 support should be a snap, as well as any other fs that wants to use this.
Thanks much,
Signed-off-by: Josef Bacik <jbacik@redhat.com>
Index: linux-2.6.25/fs/ext3/file.c
===================================================================
--- linux-2.6.25.orig/fs/ext3/file.c
+++ linux-2.6.25/fs/ext3/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_
.removexattr = generic_removexattr,
#endif
.permission = ext3_permission,
+ .fiemap = ext3_fiemap,
};
Index: linux-2.6.25/fs/ext3/inode.c
===================================================================
--- linux-2.6.25.orig/fs/ext3/inode.c
+++ linux-2.6.25/fs/ext3/inode.c
@@ -36,6 +36,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
#include <linux/bio.h>
+#include <linux/fiemap.h>
#include "xattr.h"
#include "acl.h"
@@ -981,6 +982,11 @@ out:
return ret;
}
+int ext3_fiemap(struct inode *inode, unsigned long arg)
+{
+ return generic_block_fiemap(inode, arg, ext3_get_block);
+}
+
/*
* `handle' can be NULL if create is zero
*/
Index: linux-2.6.25/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.25.orig/include/linux/ext3_fs.h
+++ linux-2.6.25/include/linux/ext3_fs.h
@@ -836,6 +836,7 @@ extern void ext3_truncate (struct inode
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
extern void ext3_set_aops(struct inode *inode);
+extern int ext3_fiemap(struct inode *inode, unsigned long arg);
/* ioctl.c */
extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
Index: linux-2.6.25/fs/ioctl.c
===================================================================
--- linux-2.6.25.orig/fs/ioctl.c
+++ linux-2.6.25/fs/ioctl.c
@@ -13,6 +13,8 @@
#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
#include <asm/ioctls.h>
@@ -99,6 +101,166 @@ static int ioctl_fiemap(struct file *fil
return error;
}
+int generic_block_fiemap(struct inode *inode, unsigned long arg,
+ get_block_t *get_block)
+{
+ struct fiemap *fiemap_s;
+ struct fiemap_extent fiemap_e;
+ struct buffer_head tmp;
+ unsigned int start_blk;
+ unsigned int num_of_extents;
+ long length;
+ char *cur_ext_ptr = (char *)(arg + sizeof(struct fiemap));
+ int ret, hole = 0;
+
+ fiemap_s = kmalloc(sizeof(struct fiemap), GFP_KERNEL);
+ if (!fiemap_s)
+ return -ENOMEM;
+
+ if (copy_from_user(fiemap_s, (struct fiemap __user *)arg,
+ sizeof(struct fiemap))) {
+ ret = -EFAULT;
+ goto out_free;
+ }
+
+ /*
+ * if fm_start is in the middle of the current block, get the next
+ * block so we don't end up returning a start thats before the given
+ * fm_start
+ */
+ start_blk = (fiemap_s->fm_start + (1 << inode->i_blkbits) - 1) >>
+ inode->i_blkbits;
+ num_of_extents = fiemap_s->fm_extent_count;
+
+ if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ };
+ ret = sync_inode(inode, &wbc);
+ if (ret)
+ goto out_free;
+ }
+
+ /* guard against change */
+ mutex_lock(&inode->i_mutex);
+
+ /*
+ * we want the comparisons to be unsigned, in case somebody passes -1,
+ * meaning they want they want the entire file, but the result has to be
+ * signed so we can handle the case where we get more blocks than the
+ * size of the file
+ */
+ length = (long)min((unsigned long)fiemap_s->fm_length,
+ (unsigned long)i_size_read(inode));
+
+ fiemap_s->fm_start = start_blk << inode->i_blkbits;
+ fiemap_s->fm_length = 0;
+
+ memset(&fiemap_e, 0, sizeof(struct fiemap_extent));
+ do {
+ /*
+ * we set this to length because we want ext3_get_block to
+ * find as many contiguous blocks as it can
+ */
+ memset(&tmp, 0, sizeof(struct buffer_head));
+ tmp.b_size = length;
+
+ ret = get_block(inode, start_blk, &tmp, 0);
+ if (ret)
+ break;
+
+ /*
+ * Hole, we're going to have to walk the inodes blocks until
+ * find data
+ */
+ if (!tmp.b_blocknr) {
+
+ if (!hole) {
+ hole = 1;
+ fiemap_e.fe_flags |= FIEMAP_EXTENT_HOLE;
+ fiemap_e.fe_offset = start_blk <<
+ inode->i_blkbits;
+ }
+
+ fiemap_e.fe_length += 1 << inode->i_blkbits;
+ length -= 1 << inode->i_blkbits;
+ start_blk++;
+ } else {
+ if (hole &&
+ !copy_to_user(cur_ext_ptr, &fiemap_e,
+ sizeof(struct fiemap_extent))) {
+ cur_ext_ptr += sizeof(struct fiemap_extent);
+ fiemap_s->fm_extent_count++;
+ fiemap_s->fm_length += fiemap_e.fe_length;
+
+ hole = 0;
+ num_of_extents--;
+ memset(&fiemap_e, 0,
+ sizeof(struct fiemap_extent));
+
+ if (!num_of_extents || length <= 0)
+ break;
+ } else if (hole) {
+ /* copy_to_user failed */
+ ret = -EFAULT;
+ break;
+ }
+
+ length -= tmp.b_size;
+ start_blk += tmp.b_size >> inode->i_blkbits;
+
+ fiemap_e.fe_offset = tmp.b_blocknr <<
+ inode->i_blkbits;
+ fiemap_e.fe_length = tmp.b_size;
+
+ if (length <= 0)
+ fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;
+
+ if (!copy_to_user(cur_ext_ptr, &fiemap_e,
+ sizeof(struct fiemap_extent))) {
+ cur_ext_ptr += sizeof(struct fiemap_extent);
+ num_of_extents--;
+ } else {
+ ret = -EFAULT;
+ break;
+ }
+
+ fiemap_s->fm_extent_count++;
+ fiemap_s->fm_length += fiemap_e.fe_length;
+
+ memset(&fiemap_e, 0, sizeof(struct fiemap_extent));
+ }
+ } while (length > 0 && num_of_extents);
+
+ mutex_unlock(&inode->i_mutex);
+
+ /* hole at the end of the file */
+ if (hole && !copy_to_user(cur_ext_ptr, &fiemap_e,
+ sizeof(struct fiemap_extent))) {
+ fiemap_s->fm_extent_count++;
+ fiemap_s->fm_length += fiemap_e.fe_length;
+
+ if (length <= 0)
+ fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;
+
+ } else if (hole) {
+ /* copy to user failed */
+ ret = -EFAULT;
+ }
+
+ if (!ret) {
+ if (copy_to_user((char *)arg, fiemap_s,
+ sizeof(struct fiemap)))
+ ret = -EFAULT;
+ }
+
+out_free:
+ kfree(fiemap_s);
+ return ret;
+}
+EXPORT_SYMBOL(generic_block_fiemap);
+
static int file_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
Index: linux-2.6.25/include/linux/fs.h
===================================================================
--- linux-2.6.25.orig/include/linux/fs.h
+++ linux-2.6.25/include/linux/fs.h
@@ -1925,6 +1925,8 @@ extern int vfs_fstat(unsigned int, struc
extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
unsigned long arg);
+extern int generic_block_fiemap(struct inode *inode, unsigned long arg,
+ get_block_t *get_block);
extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-23 19:39 [PATCH] fiemap support for ext3 Josef Bacik
@ 2008-04-23 23:27 ` Andreas Dilger
2008-04-23 23:48 ` Eric Sandeen
2008-04-24 2:58 ` Andreas Dilger
0 siblings, 2 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-23 23:27 UTC (permalink / raw)
To: Josef Bacik; +Cc: linux-ext4, sandeen
On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
> Here is my reworked fiemap patch for ext3. The generic fiemap handler takes the
> get_block_t from the filesystem and does the fiemap that way, so then adding
> ext2 support should be a snap, as well as any other fs that wants to use this.
The generic mechanism is really good, some minor issues still exist.
> +int generic_block_fiemap(struct inode *inode, unsigned long arg,
> + get_block_t *get_block)
> +{
> + /*
> + * if fm_start is in the middle of the current block, get the next
> + * block so we don't end up returning a start thats before the given
> + * fm_start
> + */
> + start_blk = (fiemap_s->fm_start + (1 << inode->i_blkbits) - 1) >>
> + inode->i_blkbits;
This should return the start of the current block (so the requested fm_start
is included in the returned mapping), instead of the start of the next block.
> + /*
> + * we want the comparisons to be unsigned, in case somebody passes -1,
> + * meaning they want they want the entire file, but the result has to be
> + * signed so we can handle the case where we get more blocks than the
> + * size of the file
> + */
> + length = (long)min((unsigned long)fiemap_s->fm_length,
> + (unsigned long)i_size_read(inode));
This might be written as:
length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));
Also, what about files that have blocks mapped after i_size?
> + if (!tmp.b_blocknr) {
> +
> + if (!hole) {
No empty line above "if (!hole)".
> + fiemap_e.fe_offset = tmp.b_blocknr <<
> + inode->i_blkbits;
This doesn't need to wrap.
> + fiemap_e.fe_length = tmp.b_size;
> +
> + if (length <= 0)
> + fiemap_e.fe_flags |= FIEMAP_EXTENT_LAST;
Should the exit condition should be the first hole (block == 0) after i_size?
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-23 23:27 ` Andreas Dilger
@ 2008-04-23 23:48 ` Eric Sandeen
2008-04-24 2:56 ` Andreas Dilger
2008-04-24 2:58 ` Andreas Dilger
1 sibling, 1 reply; 9+ messages in thread
From: Eric Sandeen @ 2008-04-23 23:48 UTC (permalink / raw)
To: Andreas Dilger; +Cc: Josef Bacik, linux-ext4
Andreas Dilger wrote:
> On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
>> + /*
>> + * we want the comparisons to be unsigned, in case somebody passes -1,
>> + * meaning they want they want the entire file, but the result has to be
>> + * signed so we can handle the case where we get more blocks than the
>> + * size of the file
>> + */
>> + length = (long)min((unsigned long)fiemap_s->fm_length,
>> + (unsigned long)i_size_read(inode));
>
> This might be written as:
>
> length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));
>
> Also, what about files that have blocks mapped after i_size?
That'll be tough for ext3, though I guess for a generic interface it
could happen, so I guess it needs to be handled. Maybe check i_blocks
against i_size, see if i_blocks indicates blocks past EOF? Hm, I guess
that's not going to work in general; you could be completely sparse up
to an EOF at 100G and have 100M of blocks past that...
I don't know how you'd handle this in the generic case...?
-Eric
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-23 23:48 ` Eric Sandeen
@ 2008-04-24 2:56 ` Andreas Dilger
0 siblings, 0 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24 2:56 UTC (permalink / raw)
To: Eric Sandeen; +Cc: Josef Bacik, linux-ext4
On Apr 23, 2008 18:48 -0500, Eric Sandeen wrote:
> Andreas Dilger wrote:
> > On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
> >> + /*
> >> + * we want the comparisons to be unsigned, in case somebody passes -1,
> >> + * meaning they want they want the entire file, but the result has to be
> >> + * signed so we can handle the case where we get more blocks than the
> >> + * size of the file
> >> + */
> >> + length = (long)min((unsigned long)fiemap_s->fm_length,
> >> + (unsigned long)i_size_read(inode));
> >
> > This might be written as:
> >
> > length = (long)min_t(unsigned long,fiemap_s->fm_len,i_size_read(inode));
> >
> > Also, what about files that have blocks mapped after i_size?
>
> That'll be tough for ext3, though I guess for a generic interface it
> could happen, so I guess it needs to be handled.
Right, because some filesystems may preallocate blocks beyond i_size to
avoid fragmentation.
> Maybe check i_blocks
> against i_size, see if i_blocks indicates blocks past EOF? Hm, I guess
> that's not going to work in general; you could be completely sparse up
> to an EOF at 100G and have 100M of blocks past that...
...and there are also indirect blocks, and EA blocks that are not counted
toward i_size. The issue is that getblock() doesn't have any way of
reporting that it is beyond EOF. If it was an ext2/ext3-specific mechanism
then it could check in the i_block[] array and in the end of the
{t,d,}indirect blocks to know conclusively whether there are any blocks
beyond EOF.
That said, I don't think the generic interface can know everything about
each filesystem. My suggestion was that blocks beyond i_size continue
to be mapped until a hole (block == 0) is returned. It isn't perfect,
but would likely cover 99.9% of the cases where some small number of blocks
(<= 64kB or whatever) were allocated beyond EOF to avoid fragmentation.
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-23 23:27 ` Andreas Dilger
2008-04-23 23:48 ` Eric Sandeen
@ 2008-04-24 2:58 ` Andreas Dilger
2008-04-24 12:36 ` Josef Bacik
1 sibling, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24 2:58 UTC (permalink / raw)
To: Josef Bacik; +Cc: linux-ext4, sandeen
On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
> Here is my reworked fiemap patch for ext3. The generic fiemap handler
> takes the get_block_t from the filesystem and does the fiemap that way,
> so then adding ext2 support should be a snap, as well as any other fs
> that wants to use this.
Ah, one other point is that this should also be used by ext4 to map inodes
that are not in extents format, replacing the -EOPNOTSUPP return in that
case.
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-24 2:58 ` Andreas Dilger
@ 2008-04-24 12:36 ` Josef Bacik
2008-04-24 12:52 ` Eric Sandeen
2008-04-24 14:59 ` Andreas Dilger
0 siblings, 2 replies; 9+ messages in thread
From: Josef Bacik @ 2008-04-24 12:36 UTC (permalink / raw)
To: Andreas Dilger; +Cc: Josef Bacik, linux-ext4, sandeen
On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
> On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
> > Here is my reworked fiemap patch for ext3. The generic fiemap handler
> > takes the get_block_t from the filesystem and does the fiemap that way,
> > so then adding ext2 support should be a snap, as well as any other fs
> > that wants to use this.
>
> Ah, one other point is that this should also be used by ext4 to map inodes
> that are not in extents format, replacing the -EOPNOTSUPP return in that
> case.
>
Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
specify wether the inode you are mapping does truly have extents or if its just
more of a block map. Do you think something like that would be usefull? Thank
you,
Josef
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-24 12:52 ` Eric Sandeen
@ 2008-04-24 12:46 ` Josef Bacik
0 siblings, 0 replies; 9+ messages in thread
From: Josef Bacik @ 2008-04-24 12:46 UTC (permalink / raw)
To: Eric Sandeen; +Cc: Josef Bacik, Andreas Dilger, linux-ext4
On Thu, Apr 24, 2008 at 07:52:12AM -0500, Eric Sandeen wrote:
> Josef Bacik wrote:
> > On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
> >> On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
> >>> Here is my reworked fiemap patch for ext3. The generic fiemap handler
> >>> takes the get_block_t from the filesystem and does the fiemap that way,
> >>> so then adding ext2 support should be a snap, as well as any other fs
> >>> that wants to use this.
> >> Ah, one other point is that this should also be used by ext4 to map inodes
> >> that are not in extents format, replacing the -EOPNOTSUPP return in that
> >> case.
> >>
> >
> > Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
> > specify wether the inode you are mapping does truly have extents or if its just
> > more of a block map. Do you think something like that would be usefull? Thank
> > you,
> >
> > Josef
>
> I don't see that in itself as useful; you care about file layout, but
> why would you care about the filesystem's mechanism for tracking that
> layout?
>
idk, just thought it may be usefull in the case of ext4 where you potentially
have both formats. Mostly thinking out loud while we can still change things.
Thanks,
Josef
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-24 12:36 ` Josef Bacik
@ 2008-04-24 12:52 ` Eric Sandeen
2008-04-24 12:46 ` Josef Bacik
2008-04-24 14:59 ` Andreas Dilger
1 sibling, 1 reply; 9+ messages in thread
From: Eric Sandeen @ 2008-04-24 12:52 UTC (permalink / raw)
To: Josef Bacik; +Cc: Andreas Dilger, linux-ext4
Josef Bacik wrote:
> On Wed, Apr 23, 2008 at 08:58:03PM -0600, Andreas Dilger wrote:
>> On Apr 23, 2008 15:39 -0400, Josef Bacik wrote:
>>> Here is my reworked fiemap patch for ext3. The generic fiemap handler
>>> takes the get_block_t from the filesystem and does the fiemap that way,
>>> so then adding ext2 support should be a snap, as well as any other fs
>>> that wants to use this.
>> Ah, one other point is that this should also be used by ext4 to map inodes
>> that are not in extents format, replacing the -EOPNOTSUPP return in that
>> case.
>>
>
> Sounds good, but I'm wondering if we should add a flag to the FIEMAP stuff to
> specify wether the inode you are mapping does truly have extents or if its just
> more of a block map. Do you think something like that would be usefull? Thank
> you,
>
> Josef
I don't see that in itself as useful; you care about file layout, but
why would you care about the filesystem's mechanism for tracking that
layout?
-Eric
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] fiemap support for ext3
2008-04-24 12:36 ` Josef Bacik
2008-04-24 12:52 ` Eric Sandeen
@ 2008-04-24 14:59 ` Andreas Dilger
1 sibling, 0 replies; 9+ messages in thread
From: Andreas Dilger @ 2008-04-24 14:59 UTC (permalink / raw)
To: Josef Bacik; +Cc: linux-ext4, sandeen
On Apr 24, 2008 08:36 -0400, Josef Bacik wrote:
> I'm wondering if we should add a flag to the FIEMAP stuff to specify
> wether the inode you are mapping does truly have extents or if its just
> more of a block map. Do you think something like that would be usefull?
You can already do lsattr (EXT*_IOC_GETFLAGS) on a file to determine if
it is using extents:
# lsattr /mnt/tmp/O/0/d0/459808
-------------e /mnt/tmp/O/0/d0/459808
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-04-24 15:00 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-23 19:39 [PATCH] fiemap support for ext3 Josef Bacik
2008-04-23 23:27 ` Andreas Dilger
2008-04-23 23:48 ` Eric Sandeen
2008-04-24 2:56 ` Andreas Dilger
2008-04-24 2:58 ` Andreas Dilger
2008-04-24 12:36 ` Josef Bacik
2008-04-24 12:52 ` Eric Sandeen
2008-04-24 12:46 ` Josef Bacik
2008-04-24 14:59 ` Andreas Dilger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox