From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap
Date: Thu, 11 Aug 2016 10:59:36 -0400 (EDT) [thread overview]
Message-ID: <1935983818.2525484.1470927576083.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <1311582922.2518701.1470926538032.JavaMail.zimbra@redhat.com>
Hi,
This patch replaces the GFS2 fiemap implementation that used vfs
function __generic_block_fiemap with a new implementation that uses
the new iomap-based fiemap interface. This allows GFS2's fiemap to
skip holes. The time to do filefrag on a file with a 1 petabyte hole
is reduced from several days or weeks, to milliseconds. Note that
there are Kconfig changes that affect everyone.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad7..d601aeb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -11,7 +11,7 @@ config DCACHE_WORD_ACCESS
if BLOCK
config FS_IOMAP
- bool
+ bool "File System IOMAP Support"
source "fs/ext2/Kconfig"
source "fs/ext4/Kconfig"
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 90c6a8f..f8fa955 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -25,6 +25,7 @@ config GFS2_FS
config GFS2_FS_LOCKING_DLM
bool "GFS2 DLM locking"
depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
+ FS_IOMAP && \
CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
help
Multiple node locking module for GFS2
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1..685f1ed 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -588,6 +588,155 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
}
/**
+ * hole_size - figure out the size of a hole
+ * @ip: The inode
+ * @lblock: The logical starting block number
+ * @mp: The metapath
+ *
+ * Returns: The hole size in bytes
+ *
+ */
+static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct metapath mp_eof;
+ unsigned int end_of_metadata = ip->i_height - 1;
+ u64 factor = 1;
+ int hgt = end_of_metadata;
+ u64 holesz = 0, holestep;
+ const __be64 *first, *end, *ptr;
+ const struct buffer_head *bh;
+ u64 isize = i_size_read(inode);
+ int zeroptrs;
+ bool done = false;
+
+ /* Get another metapath, to the very last byte */
+ find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
+ ip->i_height);
+ for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
+ bh = mp->mp_bh[hgt];
+ if (bh) {
+ zeroptrs = 0;
+ first = metapointer(hgt, mp);
+ end = (const __be64 *)(bh->b_data + bh->b_size);
+
+ for (ptr = first; ptr < end; ptr++) {
+ if (*ptr) {
+ done = true;
+ break;
+ } else {
+ zeroptrs++;
+ }
+ }
+ } else {
+ zeroptrs = sdp->sd_inptrs;
+ }
+ holestep = min(factor * zeroptrs,
+ isize - (lblock + (zeroptrs * holesz)));
+ holesz += holestep;
+ if (lblock + holesz >= isize)
+ return holesz << inode->i_blkbits;
+
+ factor *= sdp->sd_inptrs;
+ if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
+ (mp->mp_list[hgt - 1])++;
+ }
+ return holesz << inode->i_blkbits;
+}
+
+/**
+ * gfs2_get_iomap - Map blocks from an inode to disk blocks
+ * @mapping: The address space
+ * @pos: Starting position in bytes
+ * @length: Length to map, in bytes
+ * @iomap: The iomap structure
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
+ struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ unsigned int bsize = sdp->sd_sb.sb_bsize;
+ const u64 *arr = sdp->sd_heightsize;
+ __be64 *ptr;
+ sector_t lblock = pos >> sdp->sd_sb.sb_bsize_shift;
+ u64 size;
+ struct metapath mp;
+ int ret, eob;
+ unsigned int len;
+ struct buffer_head *bh;
+ u8 height;
+ loff_t isize = i_size_read(inode);
+
+ if (length == 0)
+ return -EINVAL;
+
+ iomap->offset = pos;
+ iomap->blkno = 0;
+ iomap->type = IOMAP_HOLE;
+ iomap->length = length;
+
+ if (pos >= isize)
+ return 0;
+
+ memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
+ bmap_lock(ip, 0);
+ if (gfs2_is_dir(ip)) {
+ bsize = sdp->sd_jbsize;
+ arr = sdp->sd_jheightsize;
+ }
+
+ ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+ if (ret)
+ goto out_release;
+
+ height = ip->i_height;
+ size = (lblock + 1) * bsize;
+ while (size > arr[height])
+ height++;
+ find_metapath(sdp, lblock, &mp, height);
+ if (height > ip->i_height || gfs2_is_stuffed(ip)) {
+ ret = -EINVAL;
+ goto out_release;
+ }
+ ret = lookup_metapath(ip, &mp);
+ if (ret < 0)
+ goto out_release;
+
+ if (ret != ip->i_height) {
+ iomap->length = hole_size(inode, lblock, &mp);
+ goto out_meta_hole;
+ }
+
+ ptr = metapointer(ip->i_height - 1, &mp);
+ iomap->blkno = be64_to_cpu(*ptr);
+ if (*ptr)
+ iomap->type = IOMAP_MAPPED;
+ else
+ iomap->type = IOMAP_HOLE;
+
+ bh = mp.mp_bh[ip->i_height - 1];
+ len = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
+ length >> inode->i_blkbits, &eob);
+ iomap->length = len << sdp->sd_sb.sb_bsize_shift;
+ /* If we go past eof, round up to the nearest block */
+ if (iomap->offset + iomap->length >= isize)
+ iomap->length = (((isize - iomap->offset) + (bsize - 1)) &
+ ~(bsize - 1));
+
+out_meta_hole:
+ ret = 0;
+out_release:
+ release_metapath(&mp);
+ bmap_unlock(ip, 0);
+ return ret;
+}
+
+/**
* gfs2_block_map - Map a block from an inode to a disk block
* @inode: The inode
* @lblock: The logical block number
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 81ded5e..8da2429 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,6 +10,8 @@
#ifndef __BMAP_DOT_H__
#define __BMAP_DOT_H__
+#include <linux/iomap.h>
+
#include "inode.h"
struct inode;
@@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
struct buffer_head *bh, int create);
+extern int gfs2_get_iomap(struct inode *inode, loff_t pos,
+ ssize_t length, struct iomap *iomap);
extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
u64 *dblock, unsigned *extlen);
extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e4da0ec..0d705ef 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -17,7 +17,7 @@
#include <linux/posix_acl.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
#include <linux/security.h>
#include <asm/uaccess.h>
@@ -1990,28 +1990,65 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+static int gfs2_iomap_fiemap_begin(struct inode *inode, loff_t offset,
+ loff_t length, unsigned flags,
+ struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder *gh;
+ int ret;
+
+ gh = kzalloc(sizeof(struct gfs2_holder), GFP_NOFS);
+ if (!gh)
+ return -ENOMEM;
+
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, gh);
+ if (ret) {
+ kfree(gh);
+ return ret;
+ }
+ ret = gfs2_get_iomap(inode, offset, length, iomap);
+ if (ret)
+ gfs2_glock_dq_uninit(gh);
+ return ret;
+}
+
+static int gfs2_iomap_fiemap_end(struct inode *inode, loff_t offset,
+ loff_t length, ssize_t written,
+ unsigned flags, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder *gh;
+
+ gh = gfs2_glock_is_locked_by_me(ip->i_gl);
+ BUG_ON(gh == NULL);
+ gfs2_glock_dq_uninit(gh);
+ return 0;
+}
+
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
int ret;
+ struct iomap_ops gfs2_iomap_fiemap_ops = {
+ .iomap_begin = gfs2_iomap_fiemap_begin,
+ .iomap_end = gfs2_iomap_fiemap_end,
+ };
- ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
- if (ret)
- return ret;
inode_lock(inode);
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (ret)
- goto out;
-
if (gfs2_is_stuffed(ip)) {
+ struct gfs2_holder gh;
u64 phys = ip->i_no_addr << inode->i_blkbits;
u64 size = i_size_read(inode);
u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
FIEMAP_EXTENT_DATA_INLINE;
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ if (ret)
+ goto out;
+
phys += sizeof(struct gfs2_dinode);
phys += start;
if (start + len > size)
@@ -2021,12 +2058,12 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
len, flags);
if (ret == 1)
ret = 0;
+ gfs2_glock_dq_uninit(&gh);
} else {
- ret = __generic_block_fiemap(inode, fieinfo, start, len,
- gfs2_block_map);
+ ret = iomap_fiemap(inode, fieinfo, start, len,
+ &gfs2_iomap_fiemap_ops);
}
- gfs2_glock_dq_uninit(&gh);
out:
inode_unlock(inode);
return ret;
next parent reply other threads:[~2016-08-11 14:59 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1311582922.2518701.1470926538032.JavaMail.zimbra@redhat.com>
2016-08-11 14:59 ` Bob Peterson [this message]
2016-08-11 15:26 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap Steven Whitehouse
2016-08-12 19:57 ` [Cluster-devel] [GFS2 PATCH][v2] " Bob Peterson
2016-08-15 8:41 ` Steven Whitehouse
2016-08-18 18:21 ` Christoph Hellwig
2016-08-11 17:17 ` [Cluster-devel] [GFS2 PATCH] " Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1935983818.2525484.1470927576083.JavaMail.zimbra@redhat.com \
--to=rpeterso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.