* [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap
[not found] <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
@ 2014-08-11 17:01 ` Bob Peterson
2014-08-12 23:03 ` Dave Chinner
2014-08-13 17:53 ` [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap Bob Peterson
0 siblings, 2 replies; 3+ messages in thread
From: Bob Peterson @ 2014-08-11 17:01 UTC (permalink / raw)
To: linux-fsdevel
Hi,
I'm just tossing this proof-of-concept patch out there to get some feedback
from the community. The problem relates to the performance of fiemap on
sparse files.
If you have a very big sparse file with huge holes, when those holes are
encountered, function __generic_block_fiemap iterates for every block
with "start_blk++;". This is extremely slow, inefficient and time consuming.
A simple command like:
dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P
will cause some file systems to run continuously for days or weeks given
a filefrag command, even though the file contains only a single byte.
I encountered it with GFS2.
Sure, GFS2 does not need to call the generic fiemap. I can (and did)
easily implement a GFS2-specific block_fiemap that detects and skips holes.
My question is: Does it make sense to extend this to other file systems?
This patch just adds a hook in function generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.
This, of course, would be followed up with a GFS2 patch to take advantage
of the new hook.
I realize not all file systems can make use of this concept, so I don't
know if this is valuable or not. I thought I'd toss it out there to see
what people think.
Regards,
Bob Peterson
Red Hat File Systems
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c3..359615ee 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -778,7 +778,7 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext2_get_block);
+ ext2_get_block, NULL);
}
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..03e0a51 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1053,7 +1053,7 @@ int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext3_get_block);
+ ext3_get_block, NULL);
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a..9943b81 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5153,7 +5153,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
+ ext4_get_block, NULL);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619..3788474 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -724,7 +724,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo,
- start, len, get_data_block_fiemap);
+ start, len, get_data_block_fiemap, NULL);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..e93a3bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = 0;
} else {
ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
+ gfs2_block_map, NULL);
}
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..5821a80 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
* @start: where to start mapping in the inode
* @len: how much space to map
* @get_block: the fs's get_block function
+ * @get_hole_size: the fs's get_hole_size function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
+ loff_t len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
@@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode,
u32 flags = FIEMAP_EXTENT_MERGED;
bool past_eof = false, whole_file = false;
int ret = 0;
+ u64 holesize = 1;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
if (ret)
@@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
+ if (get_hole_size) {
+ holesize = get_hole_size(inode, start_blk);
+ BUG_ON(!holesize);
+ }
+
+ start_blk += holesize;
/*
* We want to handle the case where there is an
@@ -403,11 +411,13 @@ EXPORT_SYMBOL(__generic_block_fiemap);
int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
int ret;
mutex_lock(&inode->i_mutex);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+ ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block,
+ get_hole_size);
mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60c..48c9b67 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
@@ -2545,10 +2546,12 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
- get_block_t *get_block);
+ get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap
2014-08-11 17:01 ` [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap Bob Peterson
@ 2014-08-12 23:03 ` Dave Chinner
2014-08-13 17:53 ` [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap Bob Peterson
1 sibling, 0 replies; 3+ messages in thread
From: Dave Chinner @ 2014-08-12 23:03 UTC (permalink / raw)
To: Bob Peterson; +Cc: linux-fsdevel
On Mon, Aug 11, 2014 at 01:01:04PM -0400, Bob Peterson wrote:
> Hi,
>
> I'm just tossing this proof-of-concept patch out there to get some feedback
> from the community. The problem relates to the performance of fiemap on
> sparse files.
>
> If you have a very big sparse file with huge holes, when those holes are
> encountered, function __generic_block_fiemap iterates for every block
> with "start_blk++;". This is extremely slow, inefficient and time consuming.
> A simple command like:
>
> dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P
>
> will cause some file systems to run continuously for days or weeks given
> a filefrag command, even though the file contains only a single byte.
> I encountered it with GFS2.
>
> Sure, GFS2 does not need to call the generic fiemap. I can (and did)
> easily implement a GFS2-specific block_fiemap that detects and skips holes.
> My question is: Does it make sense to extend this to other file systems?
>
> This patch just adds a hook in function generic_block_fiemap to call a
> fs-specific function to return a hole size. That way, the function
> doesn't have to do a block-by-block search when a hole is encountered.
Perhaps it would be better to create a new helper that can return
the hole size rather than extend the helper everyone is using. The
implementation can be shared, but then other filesystems can make
the choice of which implementation they use and you don't need to
touch them at all here. i.e. add generic_block_fiemap_holesize()
rather than modify the generic_block_fiemap() API.
> I realize not all file systems can make use of this concept, so I don't
> know if this is valuable or not. I thought I'd toss it out there to see
> what people think.
If you make it a separate interface, it doesn't matter whether other
filesystems can make use of it or not ;)
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap
2014-08-11 17:01 ` [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap Bob Peterson
2014-08-12 23:03 ` Dave Chinner
@ 2014-08-13 17:53 ` Bob Peterson
1 sibling, 0 replies; 3+ messages in thread
From: Bob Peterson @ 2014-08-13 17:53 UTC (permalink / raw)
To: linux-fsdevel
Hi,
This version of the patch implements Dave Chinner's suggestion, which
simplifies it (since GFS2 is the only direct caller of __generic_block_fiemap).
Background:
If you have a very big sparse file with huge holes, when those holes are
encountered, function __generic_block_fiemap iterates for every block
with "start_blk++;". This is extremely slow, inefficient and time consuming.
A simple command like:
dd if=/dev/zero of=/mnt/point/filler-P bs=1 count=1 seek=1P
will cause some file systems to run continuously for days or weeks given
a filefrag command, even though the file contains only a single byte.
I encountered it with GFS2.
Sure, I can (and did) easily implement a GFS2-specific block_fiemap that
detects and skips holes. But this patch extends the capability to other
file systems as well: They need only write their own get_hole_size()
function and call __generic_block_fiemap with it (plus the appropriate
inode mutex locking).
This patch just adds a hook in function __generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.
This, of course, would be followed up with a GFS2 patch to take advantage
of the new hook.
Regards,
Bob Peterson
Red Hat File Systems
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs: Add hooks for get_hole_size to __generic_block_fiemap
This patch adds a hook in function __generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.
---
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..e93a3bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = 0;
} else {
ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
+ gfs2_block_map, NULL);
}
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..1c97425 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
* @start: where to start mapping in the inode
* @len: how much space to map
* @get_block: the fs's get_block function
+ * @get_hole_size: the fs's get_hole_size function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
+ loff_t len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
@@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode,
u32 flags = FIEMAP_EXTENT_MERGED;
bool past_eof = false, whole_file = false;
int ret = 0;
+ u64 holesize = 1;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
if (ret)
@@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
+ if (get_hole_size) {
+ holesize = get_hole_size(inode, start_blk);
+ BUG_ON(!holesize);
+ }
+
+ start_blk += holesize;
/*
* We want to handle the case where there is an
@@ -407,7 +415,8 @@ int generic_block_fiemap(struct inode *inode,
{
int ret;
mutex_lock(&inode->i_mutex);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+ ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block,
+ NULL);
mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60c..eef1777 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
@@ -2545,7 +2546,8 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
- get_block_t *get_block);
+ get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
u64 len, get_block_t *get_block);
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-08-13 17:53 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
2014-08-11 17:01 ` [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap Bob Peterson
2014-08-12 23:03 ` Dave Chinner
2014-08-13 17:53 ` [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap Bob Peterson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).