From: Bob Peterson <rpeterso@redhat.com>
To: linux-fsdevel@vger.kernel.org
Subject: [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap
Date: Mon, 11 Aug 2014 13:01:04 -0400 (EDT) [thread overview]
Message-ID: <526123861.4943408.1407776464284.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
Hi,
I'm just tossing this proof-of-concept patch out there to get some feedback
from the community. The problem relates to the performance of fiemap on
sparse files.
If you have a very big sparse file with huge holes, when those holes are
encountered, function __generic_block_fiemap iterates for every block
with "start_blk++;". This is extremely slow, inefficient and time consuming.
A simple command like:
dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P
will cause some file systems to run continuously for days or weeks given
a filefrag command, even though the file contains only a single byte.
I encountered it with GFS2.
Sure, GFS2 does not need to call the generic fiemap. I can (and did)
easily implement a GFS2-specific block_fiemap that detects and skips holes.
My question is: Does it make sense to extend this to other file systems?
This patch just adds a hook in function generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.
This, of course, would be followed up with a GFS2 patch to take advantage
of the new hook.
I realize not all file systems can make use of this concept, so I don't
know if this is valuable or not. I thought I'd toss it out there to see
what people think.
Regards,
Bob Peterson
Red Hat File Systems
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c3..359615ee 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -778,7 +778,7 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext2_get_block);
+ ext2_get_block, NULL);
}
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..03e0a51 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1053,7 +1053,7 @@ int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext3_get_block);
+ ext3_get_block, NULL);
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a..9943b81 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5153,7 +5153,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
+ ext4_get_block, NULL);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619..3788474 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -724,7 +724,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo,
- start, len, get_data_block_fiemap);
+ start, len, get_data_block_fiemap, NULL);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..e93a3bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = 0;
} else {
ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
+ gfs2_block_map, NULL);
}
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..5821a80 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
* @start: where to start mapping in the inode
* @len: how much space to map
* @get_block: the fs's get_block function
+ * @get_hole_size: the fs's get_hole_size function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
+ loff_t len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
@@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode,
u32 flags = FIEMAP_EXTENT_MERGED;
bool past_eof = false, whole_file = false;
int ret = 0;
+ u64 holesize = 1;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
if (ret)
@@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
+ if (get_hole_size) {
+ holesize = get_hole_size(inode, start_blk);
+ BUG_ON(!holesize);
+ }
+
+ start_blk += holesize;
/*
* We want to handle the case where there is an
@@ -403,11 +411,13 @@ EXPORT_SYMBOL(__generic_block_fiemap);
int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
int ret;
mutex_lock(&inode->i_mutex);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+ ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block,
+ get_hole_size);
mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60c..48c9b67 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
@@ -2545,10 +2546,12 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
- get_block_t *get_block);
+ get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
next parent reply other threads:[~2014-08-11 17:01 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
2014-08-11 17:01 ` Bob Peterson [this message]
2014-08-12 23:03 ` [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap Dave Chinner
2014-08-13 17:53 ` [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap Bob Peterson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=526123861.4943408.1407776464284.JavaMail.zimbra@redhat.com \
--to=rpeterso@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.