From: Bob Peterson <rpeterso@redhat.com>
To: linux-fsdevel@vger.kernel.org
Subject: [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap
Date: Mon, 11 Aug 2014 13:01:04 -0400 (EDT) [thread overview]
Message-ID: <526123861.4943408.1407776464284.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
Hi,
I'm just tossing this proof-of-concept patch out there to get some feedback
from the community. The problem relates to the performance of fiemap on
sparse files.
If you have a very big sparse file with huge holes, when those holes are
encountered, function __generic_block_fiemap iterates for every block
with "start_blk++;". This is extremely slow, inefficient and time consuming.
A simple command like:
dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P
will cause some file systems to run continuously for days or weeks given
a filefrag command, even though the file contains only a single byte.
I encountered it with GFS2.
Sure, GFS2 does not need to call the generic fiemap. I can (and did)
easily implement a GFS2-specific block_fiemap that detects and skips holes.
My question is: Does it make sense to extend this to other file systems?
This patch just adds a hook in function generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.
This, of course, would be followed up with a GFS2 patch to take advantage
of the new hook.
I realize not all file systems can make use of this concept, so I don't
know if this is valuable or not. I thought I'd toss it out there to see
what people think.
Regards,
Bob Peterson
Red Hat File Systems
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c3..359615ee 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -778,7 +778,7 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext2_get_block);
+ ext2_get_block, NULL);
}
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..03e0a51 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1053,7 +1053,7 @@ int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo, start, len,
- ext3_get_block);
+ ext3_get_block, NULL);
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a..9943b81 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5153,7 +5153,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
+ ext4_get_block, NULL);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619..3788474 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -724,7 +724,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
return generic_block_fiemap(inode, fieinfo,
- start, len, get_data_block_fiemap);
+ start, len, get_data_block_fiemap, NULL);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..e93a3bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = 0;
} else {
ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
+ gfs2_block_map, NULL);
}
gfs2_glock_dq_uninit(&gh);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..5821a80 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
* @start: where to start mapping in the inode
* @len: how much space to map
* @get_block: the fs's get_block function
+ * @get_hole_size: the fs's get_hole_size function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
+ loff_t len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
@@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode,
u32 flags = FIEMAP_EXTENT_MERGED;
bool past_eof = false, whole_file = false;
int ret = 0;
+ u64 holesize = 1;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
if (ret)
@@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
- start_blk++;
+ if (get_hole_size) {
+ holesize = get_hole_size(inode, start_blk);
+ BUG_ON(!holesize);
+ }
+
+ start_blk += holesize;
/*
* We want to handle the case where there is an
@@ -403,11 +411,13 @@ EXPORT_SYMBOL(__generic_block_fiemap);
int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size)
{
int ret;
mutex_lock(&inode->i_mutex);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+ ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block,
+ get_hole_size);
mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60c..48c9b67 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
ssize_t bytes, void *private);
@@ -2545,10 +2546,12 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
- get_block_t *get_block);
+ get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern int generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
+ u64 len, get_block_t *get_block,
+ get_hole_size_t *get_hole_size);
extern void get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
next parent reply other threads:[~2014-08-11 17:01 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <998022701.4933159.1407775414413.JavaMail.zimbra@redhat.com>
2014-08-11 17:01 ` Bob Peterson [this message]
2014-08-12 23:03 ` [PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap Dave Chinner
2014-08-13 17:53 ` [PATCH][TRY #2] fs: Add hooks for get_hole_size to __generic_block_fiemap Bob Peterson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=526123861.4943408.1407776464284.JavaMail.zimbra@redhat.com \
--to=rpeterso@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).