[Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap
       [not found] <1311582922.2518701.1470926538032.JavaMail.zimbra@redhat.com>
@ 2016-08-11 14:59 ` Bob Peterson
  2016-08-11 15:26   ` Steven Whitehouse
  2016-08-11 17:17   ` [Cluster-devel] [GFS2 PATCH] " Christoph Hellwig
  0 siblings, 2 replies; 6+ messages in thread
From: Bob Peterson @ 2016-08-11 14:59 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This patch replaces the GFS2 fiemap implementation that used vfs
function __generic_block_fiemap with a new implementation that uses
the new iomap-based fiemap interface. This allows GFS2's fiemap to
skip holes. The time to do filefrag on a file with a 1 petabyte hole
is reduced from several days or weeks, to milliseconds. Note that
there are Kconfig changes that affect everyone.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad7..d601aeb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -11,7 +11,7 @@ config DCACHE_WORD_ACCESS
 if BLOCK
 
 config FS_IOMAP
-	bool
+	bool "File System IOMAP Support"
 
 source "fs/ext2/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 90c6a8f..f8fa955 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -25,6 +25,7 @@ config GFS2_FS
 config GFS2_FS_LOCKING_DLM
 	bool "GFS2 DLM locking"
 	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
+		FS_IOMAP && \
 		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
 	help
 	  Multiple node locking module for GFS2
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1..685f1ed 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -588,6 +588,155 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 }
 
 /**
+ * hole_size - figure out the size of a hole
+ * @ip: The inode
+ * @lblock: The logical starting block number
+ * @mp: The metapath
+ *
+ * Returns: The hole size in bytes
+ *
+ */
+static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct metapath mp_eof;
+	unsigned int end_of_metadata = ip->i_height - 1;
+	u64 factor = 1;
+	int hgt = end_of_metadata;
+	u64 holesz = 0, holestep;
+	const __be64 *first, *end, *ptr;
+	const struct buffer_head *bh;
+	u64 isize = i_size_read(inode);
+	int zeroptrs;
+	bool done = false;
+
+	/* Get another metapath, to the very last byte */
+	find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
+		      ip->i_height);
+	for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
+		bh = mp->mp_bh[hgt];
+		if (bh) {
+			zeroptrs = 0;
+			first = metapointer(hgt, mp);
+			end = (const __be64 *)(bh->b_data + bh->b_size);
+
+			for (ptr = first; ptr < end; ptr++) {
+				if (*ptr) {
+					done = true;
+					break;
+				} else {
+					zeroptrs++;
+				}
+			}
+		} else {
+			zeroptrs = sdp->sd_inptrs;
+		}
+		holestep = min(factor * zeroptrs,
+			       isize - (lblock + (zeroptrs * holesz)));
+		holesz += holestep;
+		if (lblock + holesz >= isize)
+			return holesz << inode->i_blkbits;
+
+		factor *= sdp->sd_inptrs;
+		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
+			(mp->mp_list[hgt - 1])++;
+	}
+	return holesz << inode->i_blkbits;
+}
+
+/**
+ * gfs2_get_iomap - Map blocks from an inode to disk blocks
+ * @mapping: The address space
+ * @pos: Starting position in bytes
+ * @length: Length to map, in bytes
+ * @iomap: The iomap structure
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
+		   struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	unsigned int bsize = sdp->sd_sb.sb_bsize;
+	const u64 *arr = sdp->sd_heightsize;
+	__be64 *ptr;
+	sector_t lblock = pos >> sdp->sd_sb.sb_bsize_shift;
+	u64 size;
+	struct metapath mp;
+	int ret, eob;
+	unsigned int len;
+	struct buffer_head *bh;
+	u8 height;
+	loff_t isize = i_size_read(inode);
+
+	if (length == 0)
+		return -EINVAL;
+
+	iomap->offset = pos;
+	iomap->blkno = 0;
+	iomap->type = IOMAP_HOLE;
+	iomap->length = length;
+
+	if (pos >= isize)
+		return 0;
+
+	memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
+	bmap_lock(ip, 0);
+	if (gfs2_is_dir(ip)) {
+		bsize = sdp->sd_jbsize;
+		arr = sdp->sd_jheightsize;
+	}
+
+	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+	if (ret)
+		goto out_release;
+
+	height = ip->i_height;
+	size = (lblock + 1) * bsize;
+	while (size > arr[height])
+		height++;
+	find_metapath(sdp, lblock, &mp, height);
+	if (height > ip->i_height || gfs2_is_stuffed(ip)) {
+		ret = -EINVAL;
+		goto out_release;
+	}
+	ret = lookup_metapath(ip, &mp);
+	if (ret < 0)
+		goto out_release;
+
+	if (ret != ip->i_height) {
+		iomap->length = hole_size(inode, lblock, &mp);
+		goto out_meta_hole;
+	}
+
+	ptr = metapointer(ip->i_height - 1, &mp);
+	iomap->blkno = be64_to_cpu(*ptr);
+	if (*ptr)
+		iomap->type = IOMAP_MAPPED;
+	else
+		iomap->type = IOMAP_HOLE;
+
+	bh = mp.mp_bh[ip->i_height - 1];
+	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
+				 length >> inode->i_blkbits, &eob);
+	iomap->length = len << sdp->sd_sb.sb_bsize_shift;
+	/* If we go past eof, round up to the nearest block */
+	if (iomap->offset + iomap->length >= isize)
+		iomap->length = (((isize - iomap->offset) + (bsize - 1)) &
+				 ~(bsize - 1));
+
+out_meta_hole:
+	ret = 0;
+out_release:
+	release_metapath(&mp);
+	bmap_unlock(ip, 0);
+	return ret;
+}
+
+/**
  * gfs2_block_map - Map a block from an inode to a disk block
  * @inode: The inode
  * @lblock: The logical block number
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 81ded5e..8da2429 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,6 +10,8 @@
 #ifndef __BMAP_DOT_H__
 #define __BMAP_DOT_H__
 
+#include <linux/iomap.h>
+
 #include "inode.h"
 
 struct inode;
@@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
+extern int gfs2_get_iomap(struct inode *inode, loff_t pos,
+			  ssize_t length, struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e4da0ec..0d705ef 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -17,7 +17,7 @@
 #include <linux/posix_acl.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/security.h>
 #include <asm/uaccess.h>
 
@@ -1990,28 +1990,65 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 }
 
+static int gfs2_iomap_fiemap_begin(struct inode *inode, loff_t offset,
+				   loff_t length, unsigned flags,
+				   struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder *gh;
+	int ret;
+
+	gh = kzalloc(sizeof(struct gfs2_holder), GFP_NOFS);
+	if (!gh)
+		return -ENOMEM;
+
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, gh);
+	if (ret) {
+		kfree(gh);
+		return ret;
+	}
+	ret = gfs2_get_iomap(inode, offset, length, iomap);
+	if (ret)
+		gfs2_glock_dq_uninit(gh);
+	return ret;
+}
+
+static int gfs2_iomap_fiemap_end(struct inode *inode, loff_t offset,
+				 loff_t length, ssize_t written,
+				 unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder *gh;
+
+	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
+	BUG_ON(gh == NULL);
+	gfs2_glock_dq_uninit(gh);
+	return 0;
+}
+
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
 	int ret;
+	struct iomap_ops gfs2_iomap_fiemap_ops = {
+		.iomap_begin = gfs2_iomap_fiemap_begin,
+		.iomap_end = gfs2_iomap_fiemap_end,
+	};
 
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
-	if (ret)
-		return ret;
 
 	inode_lock(inode);
 
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	if (ret)
-		goto out;
-
 	if (gfs2_is_stuffed(ip)) {
+		struct gfs2_holder gh;
 		u64 phys = ip->i_no_addr << inode->i_blkbits;
 		u64 size = i_size_read(inode);
 		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
 			    FIEMAP_EXTENT_DATA_INLINE;
+		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+		if (ret)
+			goto out;
+
 		phys += sizeof(struct gfs2_dinode);
 		phys += start;
 		if (start + len > size)
@@ -2021,12 +2058,12 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 						      len, flags);
 		if (ret == 1)
 			ret = 0;
+		gfs2_glock_dq_uninit(&gh);
 	} else {
-		ret = __generic_block_fiemap(inode, fieinfo, start, len,
-					     gfs2_block_map);
+		ret = iomap_fiemap(inode, fieinfo, start, len,
+				   &gfs2_iomap_fiemap_ops);
 	}
 
-	gfs2_glock_dq_uninit(&gh);
 out:
 	inode_unlock(inode);
 	return ret;



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap
  2016-08-11 14:59 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap Bob Peterson
@ 2016-08-11 15:26   ` Steven Whitehouse
  2016-08-12 19:57     ` [Cluster-devel] [GFS2 PATCH][v2] " Bob Peterson
  2016-08-11 17:17   ` [Cluster-devel] [GFS2 PATCH] " Christoph Hellwig
  1 sibling, 1 reply; 6+ messages in thread
From: Steven Whitehouse @ 2016-08-11 15:26 UTC (permalink / raw)
  To: cluster-devel.redhat.com


Hi,

On 11/08/16 15:59, Bob Peterson wrote:
> Hi,
>
> This patch replaces the GFS2 fiemap implementation that used vfs
> function __generic_block_fiemap with a new implementation that uses
> the new iomap-based fiemap interface. This allows GFS2's fiemap to
> skip holes. The time to do filefrag on a file with a 1 petabyte hole
> is reduced from several days or weeks, to milliseconds. Note that
> there are Kconfig changes that affect everyone.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 2bc7ad7..d601aeb 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -11,7 +11,7 @@ config DCACHE_WORD_ACCESS
>   if BLOCK
>   
>   config FS_IOMAP
> -	bool
> +	bool "File System IOMAP Support"
>   
>   source "fs/ext2/Kconfig"
>   source "fs/ext4/Kconfig"
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index 90c6a8f..f8fa955 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -25,6 +25,7 @@ config GFS2_FS
>   config GFS2_FS_LOCKING_DLM
>   	bool "GFS2 DLM locking"
>   	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		FS_IOMAP && \
>   		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
>   	help
>   	  Multiple node locking module for GFS2
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 6e2bec1..685f1ed 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -588,6 +588,155 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
>   }
>   
>   /**
> + * hole_size - figure out the size of a hole
> + * @ip: The inode
> + * @lblock: The logical starting block number
> + * @mp: The metapath
> + *
> + * Returns: The hole size in bytes
> + *
> + */
> +static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_sbd *sdp = GFS2_SB(inode);
> +	struct metapath mp_eof;
> +	unsigned int end_of_metadata = ip->i_height - 1;
> +	u64 factor = 1;
> +	int hgt = end_of_metadata;
> +	u64 holesz = 0, holestep;
> +	const __be64 *first, *end, *ptr;
> +	const struct buffer_head *bh;
> +	u64 isize = i_size_read(inode);
> +	int zeroptrs;
> +	bool done = false;
> +
> +	/* Get another metapath, to the very last byte */
> +	find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
> +		      ip->i_height);
> +	for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
> +		bh = mp->mp_bh[hgt];
> +		if (bh) {
> +			zeroptrs = 0;
> +			first = metapointer(hgt, mp);
> +			end = (const __be64 *)(bh->b_data + bh->b_size);
> +
> +			for (ptr = first; ptr < end; ptr++) {
> +				if (*ptr) {
> +					done = true;
> +					break;
> +				} else {
> +					zeroptrs++;
> +				}
> +			}
> +		} else {
> +			zeroptrs = sdp->sd_inptrs;
> +		}
> +		holestep = min(factor * zeroptrs,
> +			       isize - (lblock + (zeroptrs * holesz)));
> +		holesz += holestep;
> +		if (lblock + holesz >= isize)
> +			return holesz << inode->i_blkbits;
> +
> +		factor *= sdp->sd_inptrs;
> +		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
> +			(mp->mp_list[hgt - 1])++;
> +	}
> +	return holesz << inode->i_blkbits;
> +}
> +
> +/**
> + * gfs2_get_iomap - Map blocks from an inode to disk blocks
> + * @mapping: The address space
> + * @pos: Starting position in bytes
> + * @length: Length to map, in bytes
> + * @iomap: The iomap structure
> + *
> + * Returns: errno
> + */
> +
> +int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
> +		   struct iomap *iomap)
This function should be merged with gfs2_block_map() I think, so that 
gfs2_block_map just becomes a wrapper (which will eventually go away) 
for this function. Otherwise we will have two parallel but slightly 
different implementations of block mapping to maintain.

[snip]
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index e4da0ec..0d705ef 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -17,7 +17,7 @@
>   #include <linux/posix_acl.h>
>   #include <linux/gfs2_ondisk.h>
>   #include <linux/crc32.h>
> -#include <linux/fiemap.h>
> +#include <linux/iomap.h>
>   #include <linux/security.h>
>   #include <asm/uaccess.h>
>   
> @@ -1990,28 +1990,65 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
>   	return 0;
>   }
>   
> +static int gfs2_iomap_fiemap_begin(struct inode *inode, loff_t offset,
> +				   loff_t length, unsigned flags,
> +				   struct iomap *iomap)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_holder *gh;
> +	int ret;
> +
> +	gh = kzalloc(sizeof(struct gfs2_holder), GFP_NOFS);
> +	if (!gh)
> +		return -ENOMEM;
> +
> +	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, gh);
> +	if (ret) {
> +		kfree(gh);
> +		return ret;
> +	}
> +	ret = gfs2_get_iomap(inode, offset, length, iomap);
> +	if (ret)
> +		gfs2_glock_dq_uninit(gh);
> +	return ret;
> +}
> +
> +static int gfs2_iomap_fiemap_end(struct inode *inode, loff_t offset,
> +				 loff_t length, ssize_t written,
> +				 unsigned flags, struct iomap *iomap)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_holder *gh;
> +
> +	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
Is there a better way to pass the gh from the begin function to the end 
function? I'm sure that will work, but it is the kind of thing that 
might trip up the unwary in the future,

Otherwise I think it looks good,

Steve.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap
  2016-08-11 14:59 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap Bob Peterson
  2016-08-11 15:26   ` Steven Whitehouse
@ 2016-08-11 17:17   ` Christoph Hellwig
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2016-08-11 17:17 UTC (permalink / raw)
  To: cluster-devel.redhat.com

>  config FS_IOMAP
> -	bool
> +	bool "File System IOMAP Support"

This shouldn't be a user visible option...

> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index 90c6a8f..f8fa955 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -25,6 +25,7 @@ config GFS2_FS
>  config GFS2_FS_LOCKING_DLM
>  	bool "GFS2 DLM locking"
>  	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		FS_IOMAP && \
>  		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)

... just use select here



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Cluster-devel] [GFS2 PATCH][v2] GFS2: Add function gfs2_get_iomap
  2016-08-11 15:26   ` Steven Whitehouse
@ 2016-08-12 19:57     ` Bob Peterson
  2016-08-15  8:41       ` Steven Whitehouse
  2016-08-18 18:21       ` Christoph Hellwig
  0 siblings, 2 replies; 6+ messages in thread
From: Bob Peterson @ 2016-08-12 19:57 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
| > +int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
| > +		   struct iomap *iomap)
| This function should be merged with gfs2_block_map() I think, so that
| gfs2_block_map just becomes a wrapper (which will eventually go away)
| for this function. Otherwise we will have two parallel but slightly
| different implementations of block mapping to maintain.

Yes, I guess that's worth doing. I did this and included the revised
patch below.

| Is there a better way to pass the gh from the begin function to the end
| function? I'm sure that will work, but it is the kind of thing that
| might trip up the unwary in the future,

Yeah, I would have passed the gh another way, but Christoph didn't
leave a good way in the iomap interface or other bits to do it.
At least not that I can see. It might be worth changing the interface to
add that flexibility for other file systems that might want to do this
as well. It would be easy enough to add a (void *)private pointer or
something that can be passed around. Christoph and Steve: Would you
prefer I plumb something before I do the GFS2 bits?

For now, here's a replacement patch which still does it the only way possible.

Bob Peterson
Red Hat File Systems
---
This patch replaces the GFS2 fiemap implementation that used vfs
function __generic_block_fiemap with a new implementation that uses
the new iomap-based fiemap interface. This allows GFS2's fiemap to
skip holes. The time to do filefrag on a file with a 1 petabyte hole
is reduced from several days or weeks, to milliseconds. Note that
there are Kconfig changes that affect everyone.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 90c6a8f..f8fa955 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -25,6 +25,7 @@ config GFS2_FS
 config GFS2_FS_LOCKING_DLM
 	bool "GFS2 DLM locking"
 	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
+		FS_IOMAP && \
 		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
 	help
 	  Multiple node locking module for GFS2
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1..839f2a6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -588,6 +588,66 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 }
 
 /**
+ * hole_size - figure out the size of a hole
+ * @ip: The inode
+ * @lblock: The logical starting block number
+ * @mp: The metapath
+ *
+ * Returns: The hole size in bytes
+ *
+ */
+static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct metapath mp_eof;
+	unsigned int end_of_metadata = ip->i_height - 1;
+	u64 factor = 1;
+	int hgt = end_of_metadata;
+	u64 holesz = 0, holestep;
+	const __be64 *first, *end, *ptr;
+	const struct buffer_head *bh;
+	u64 isize = i_size_read(inode);
+	int zeroptrs;
+	bool done = false;
+
+	/* Get another metapath, to the very last byte */
+	find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
+		      ip->i_height);
+	for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
+		bh = mp->mp_bh[hgt];
+		if (bh) {
+			zeroptrs = 0;
+			first = metapointer(hgt, mp);
+			end = (const __be64 *)(bh->b_data + bh->b_size);
+
+			for (ptr = first; ptr < end; ptr++) {
+				if (*ptr) {
+					done = true;
+					break;
+				} else {
+					zeroptrs++;
+				}
+			}
+		} else {
+			zeroptrs = sdp->sd_inptrs;
+		}
+		holestep = min(factor * zeroptrs,
+			       isize - (lblock + (zeroptrs * holesz)));
+		holesz += holestep;
+		if (lblock + holesz >= isize)
+			return holesz << inode->i_blkbits;
+
+		factor *= sdp->sd_inptrs;
+		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
+			(mp->mp_list[hgt - 1])++;
+	}
+	return holesz << inode->i_blkbits;
+}
+
+enum gfs2_map_type { maptype_iomap = 1, maptype_bhmap = 2, };
+
+/**
  * gfs2_block_map - Map a block from an inode to a disk block
  * @inode: The inode
  * @lblock: The logical block number
@@ -601,13 +661,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
  * Returns: errno
  */
 
-int gfs2_block_map(struct inode *inode, sector_t lblock,
-		   struct buffer_head *bh_map, int create)
+static int _gfs2_block_map(struct inode *inode, loff_t pos, ssize_t length,
+			   int create, enum gfs2_map_type maptype,
+			   void *iomap_or_bh)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	unsigned int bsize = sdp->sd_sb.sb_bsize;
-	const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
+	size_t maxlen = length >> inode->i_blkbits;
 	const u64 *arr = sdp->sd_heightsize;
 	__be64 *ptr;
 	u64 size;
@@ -617,14 +678,33 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	unsigned int len;
 	struct buffer_head *bh;
 	u8 height;
-
-	BUG_ON(maxlen == 0);
+	sector_t lblock = pos >> sdp->sd_sb.sb_bsize_shift;
+	struct buffer_head *bh_map = NULL;
+	struct iomap *iomap = NULL;
+	loff_t isize = i_size_read(inode);
+
+	if (maxlen == 0)
+		return -EINVAL;
+
+	if (maptype == maptype_bhmap) {
+		bh_map = (struct buffer_head *)iomap_or_bh;
+		clear_buffer_mapped(bh_map);
+		clear_buffer_new(bh_map);
+		clear_buffer_boundary(bh_map);
+	} else if (maptype == maptype_iomap) {
+		iomap = (struct iomap *)iomap_or_bh;
+		iomap->offset = pos;
+		iomap->blkno = 0;
+		iomap->type = IOMAP_HOLE;
+		iomap->length = length;
+		if (pos >= isize)
+			return 0;
+	} else {
+		BUG();
+	}
 
 	memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
 	bmap_lock(ip, create);
-	clear_buffer_mapped(bh_map);
-	clear_buffer_new(bh_map);
-	clear_buffer_boundary(bh_map);
 	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 	if (gfs2_is_dir(ip)) {
 		bsize = sdp->sd_jbsize;
@@ -640,23 +720,47 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	while (size > arr[height])
 		height++;
 	find_metapath(sdp, lblock, &mp, height);
-	ret = 1;
-	if (height > ip->i_height || gfs2_is_stuffed(ip))
+	if (height > ip->i_height || gfs2_is_stuffed(ip)) {
+		ret = (maptype == maptype_iomap ? -EINVAL : 1);
 		goto do_alloc;
+	}
 	ret = lookup_metapath(ip, &mp);
 	if (ret < 0)
 		goto out;
-	if (ret != ip->i_height)
-		goto do_alloc;
+	if (ret != ip->i_height) {
+		if (maptype == maptype_bhmap)
+			goto do_alloc;
+
+		iomap->length = hole_size(inode, lblock, &mp);
+		goto out_meta_hole;
+	}
 	ptr = metapointer(ip->i_height - 1, &mp);
-	if (*ptr == 0)
-		goto do_alloc;
-	map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
+
+	if (maptype == maptype_bhmap) {
+		if (*ptr == 0)
+			goto do_alloc;
+		map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
+	} else {
+		iomap->blkno = be64_to_cpu(*ptr);
+		if (*ptr)
+			iomap->type = IOMAP_MAPPED;
+		else
+			iomap->type = IOMAP_HOLE;
+	}
 	bh = mp.mp_bh[ip->i_height - 1];
 	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
-	bh_map->b_size = (len << inode->i_blkbits);
-	if (eob)
-		set_buffer_boundary(bh_map);
+	if (maptype == maptype_bhmap) {
+		bh_map->b_size = (len << inode->i_blkbits);
+		if (eob)
+			set_buffer_boundary(bh_map);
+	} else {
+		iomap->length = len << sdp->sd_sb.sb_bsize_shift;
+		/* If we go past eof, round up to the nearest block */
+		if (iomap->offset + iomap->length >= isize)
+			iomap->length = (((isize - iomap->offset) + (bsize - 1)) &
+					 ~(bsize - 1));
+	}
+out_meta_hole:
 	ret = 0;
 out:
 	release_metapath(&mp);
@@ -665,18 +769,61 @@ out:
 	return ret;
 
 do_alloc:
-	/* All allocations are done here, firstly check create flag */
-	if (!create) {
-		BUG_ON(gfs2_is_stuffed(ip));
-		ret = 0;
-		goto out;
-	}
+	if (maptype == maptype_bhmap) {
+		/* All allocations are done here, firstly check create flag */
+		if (!create) {
+			BUG_ON(gfs2_is_stuffed(ip));
+			ret = 0;
+			goto out;
+		}
 
-	/* At this point ret is the tree depth of already allocated blocks */
-	ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen);
+		/* At this point ret is the tree depth of already allocated
+		   blocks */
+		ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height,
+				      maxlen);
+	}
 	goto out;
 }
 
+/**
+ * gfs2_get_iomap - Map blocks from an inode to disk blocks
+ * @mapping: The address space
+ * @pos: Starting position in bytes
+ * @length: Length to map, in bytes
+ * @iomap: The iomap structure
+ *
+ * Returns: errno
+ */
+
+int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
+		   struct iomap *iomap)
+{
+	return _gfs2_block_map(inode, pos, length, 0, maptype_iomap, iomap);
+}
+
+/**
+ * gfs2_block_map - Map a block from an inode to a disk block
+ * @inode: The inode
+ * @lblock: The logical block number
+ * @bh_map: The bh to be mapped
+ * @create: True if its ok to alloc blocks to satify the request
+ *
+ * Sets buffer_mapped() if successful, sets buffer_boundary() if a
+ * read of metadata will be required before the next block can be
+ * mapped. Sets buffer_new() if new blocks were allocated.
+ *
+ * Returns: errno
+ */
+
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+		   struct buffer_head *bh_map, int create)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+	return _gfs2_block_map(inode, lblock << sdp->sd_sb.sb_bsize_shift,
+			       bh_map->b_size, create, maptype_bhmap, bh_map);
+}
+
 /*
  * Deprecated: do not use in new code
  */
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 81ded5e..8da2429 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,6 +10,8 @@
 #ifndef __BMAP_DOT_H__
 #define __BMAP_DOT_H__
 
+#include <linux/iomap.h>
+
 #include "inode.h"
 
 struct inode;
@@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
+extern int gfs2_get_iomap(struct inode *inode, loff_t pos,
+			  ssize_t length, struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e4da0ec..0d705ef 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -17,7 +17,7 @@
 #include <linux/posix_acl.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/security.h>
 #include <asm/uaccess.h>
 
@@ -1990,28 +1990,65 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 }
 
+static int gfs2_iomap_fiemap_begin(struct inode *inode, loff_t offset,
+				   loff_t length, unsigned flags,
+				   struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder *gh;
+	int ret;
+
+	gh = kzalloc(sizeof(struct gfs2_holder), GFP_NOFS);
+	if (!gh)
+		return -ENOMEM;
+
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, gh);
+	if (ret) {
+		kfree(gh);
+		return ret;
+	}
+	ret = gfs2_get_iomap(inode, offset, length, iomap);
+	if (ret)
+		gfs2_glock_dq_uninit(gh);
+	return ret;
+}
+
+static int gfs2_iomap_fiemap_end(struct inode *inode, loff_t offset,
+				 loff_t length, ssize_t written,
+				 unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder *gh;
+
+	gh = gfs2_glock_is_locked_by_me(ip->i_gl);
+	BUG_ON(gh == NULL);
+	gfs2_glock_dq_uninit(gh);
+	return 0;
+}
+
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder gh;
 	int ret;
+	struct iomap_ops gfs2_iomap_fiemap_ops = {
+		.iomap_begin = gfs2_iomap_fiemap_begin,
+		.iomap_end = gfs2_iomap_fiemap_end,
+	};
 
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
-	if (ret)
-		return ret;
 
 	inode_lock(inode);
 
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-	if (ret)
-		goto out;
-
 	if (gfs2_is_stuffed(ip)) {
+		struct gfs2_holder gh;
 		u64 phys = ip->i_no_addr << inode->i_blkbits;
 		u64 size = i_size_read(inode);
 		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
 			    FIEMAP_EXTENT_DATA_INLINE;
+		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+		if (ret)
+			goto out;
+
 		phys += sizeof(struct gfs2_dinode);
 		phys += start;
 		if (start + len > size)
@@ -2021,12 +2058,12 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 						      len, flags);
 		if (ret == 1)
 			ret = 0;
+		gfs2_glock_dq_uninit(&gh);
 	} else {
-		ret = __generic_block_fiemap(inode, fieinfo, start, len,
-					     gfs2_block_map);
+		ret = iomap_fiemap(inode, fieinfo, start, len,
+				   &gfs2_iomap_fiemap_ops);
 	}
 
-	gfs2_glock_dq_uninit(&gh);
 out:
 	inode_unlock(inode);
 	return ret;



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Cluster-devel] [GFS2 PATCH][v2] GFS2: Add function gfs2_get_iomap
  2016-08-12 19:57     ` [Cluster-devel] [GFS2 PATCH][v2] " Bob Peterson
@ 2016-08-15  8:41       ` Steven Whitehouse
  2016-08-18 18:21       ` Christoph Hellwig
  1 sibling, 0 replies; 6+ messages in thread
From: Steven Whitehouse @ 2016-08-15  8:41 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,


On 12/08/16 20:57, Bob Peterson wrote:
> ----- Original Message -----
> | > +int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
> | > +		   struct iomap *iomap)
> | This function should be merged with gfs2_block_map() I think, so that
> | gfs2_block_map just becomes a wrapper (which will eventually go away)
> | for this function. Otherwise we will have two parallel but slightly
> | different implementations of block mapping to maintain.
>
> Yes, I guess that's worth doing. I did this and included the revised
> patch below.
That wasn't quite what I was thinking of... comments below

> | Is there a better way to pass the gh from the begin function to the end
> | function? I'm sure that will work, but it is the kind of thing that
> | might trip up the unwary in the future,
>
> Yeah, I would have passed the gh another way, but Christoph didn't
> leave a good way in the iomap interface or other bits to do it.
> At least not that I can see. It might be worth changing the interface to
> add that flexibility for other file systems that might want to do this
> as well. It would be easy enough to add a (void *)private pointer or
> something that can be passed around. Christoph and Steve: Would you
> prefer I plumb something before I do the GFS2 bits?
>
> For now, here's a replacement patch which still does it the only way possible.
I think there is a possible race in case a single process opens the same 
file twice, unless I'm missing something there? It is an unlikely thing 
for someone to do, but we should allow for it anyway.

> Bob Peterson
> Red Hat File Systems
> ---
> This patch replaces the GFS2 fiemap implementation that used vfs
> function __generic_block_fiemap with a new implementation that uses
> the new iomap-based fiemap interface. This allows GFS2's fiemap to
> skip holes. The time to do filefrag on a file with a 1 petabyte hole
> is reduced from several days or weeks, to milliseconds. Note that
> there are Kconfig changes that affect everyone.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index 90c6a8f..f8fa955 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -25,6 +25,7 @@ config GFS2_FS
>   config GFS2_FS_LOCKING_DLM
>   	bool "GFS2 DLM locking"
>   	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		FS_IOMAP && \
>   		CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS)
>   	help
>   	  Multiple node locking module for GFS2
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 6e2bec1..839f2a6 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -588,6 +588,66 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
>   }
>   
>   /**
> + * hole_size - figure out the size of a hole
> + * @ip: The inode
> + * @lblock: The logical starting block number
> + * @mp: The metapath
> + *
> + * Returns: The hole size in bytes
> + *
> + */
> +static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
> +{
> +	struct gfs2_inode *ip = GFS2_I(inode);
> +	struct gfs2_sbd *sdp = GFS2_SB(inode);
> +	struct metapath mp_eof;
> +	unsigned int end_of_metadata = ip->i_height - 1;
> +	u64 factor = 1;
> +	int hgt = end_of_metadata;
> +	u64 holesz = 0, holestep;
> +	const __be64 *first, *end, *ptr;
> +	const struct buffer_head *bh;
> +	u64 isize = i_size_read(inode);
> +	int zeroptrs;
> +	bool done = false;
> +
> +	/* Get another metapath, to the very last byte */
> +	find_metapath(sdp, (isize - 1) >> inode->i_blkbits, &mp_eof,
> +		      ip->i_height);
> +	for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
> +		bh = mp->mp_bh[hgt];
> +		if (bh) {
> +			zeroptrs = 0;
> +			first = metapointer(hgt, mp);
> +			end = (const __be64 *)(bh->b_data + bh->b_size);
> +
> +			for (ptr = first; ptr < end; ptr++) {
> +				if (*ptr) {
> +					done = true;
> +					break;
> +				} else {
> +					zeroptrs++;
> +				}
> +			}
> +		} else {
> +			zeroptrs = sdp->sd_inptrs;
> +		}
> +		holestep = min(factor * zeroptrs,
> +			       isize - (lblock + (zeroptrs * holesz)));
> +		holesz += holestep;
> +		if (lblock + holesz >= isize)
> +			return holesz << inode->i_blkbits;
> +
> +		factor *= sdp->sd_inptrs;
> +		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
> +			(mp->mp_list[hgt - 1])++;
> +	}
> +	return holesz << inode->i_blkbits;
> +}
> +
> +enum gfs2_map_type { maptype_iomap = 1, maptype_bhmap = 2, };
> +
> +/**
>    * gfs2_block_map - Map a block from an inode to a disk block
>    * @inode: The inode
>    * @lblock: The logical block number
> @@ -601,13 +661,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
>    * Returns: errno
>    */
>   
> -int gfs2_block_map(struct inode *inode, sector_t lblock,
> -		   struct buffer_head *bh_map, int create)
> +static int _gfs2_block_map(struct inode *inode, loff_t pos, ssize_t length,
> +			   int create, enum gfs2_map_type maptype,
> +			   void *iomap_or_bh)
>   {
Lets just call this gfs2_iomap() and make it take a struct iomap only. 
Then we'd have a new function with
the original name:

int gfs2_block_map(struct inode *inode, sector_t lblock,
		   struct buffer_head *bh_map, int create)

{
     struct iomap *iom;

     /* Set up iomap based on bh */

     ret = gfs2_iomap(inode, pos, length, create, &iom);

     /* Copy iomap data back into bh */

    return ret;
}

That way we use iomap as the main structure through our block mapping 
functions, removing the bh from those code paths entirely and 
gfs2_block_map() becomes a function for backwards compatibility that 
we'd eventually be able to remove. It also means that you don't need the 
gfs2_map_type enum either,

Steve.




^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Cluster-devel] [GFS2 PATCH][v2] GFS2: Add function gfs2_get_iomap
  2016-08-12 19:57     ` [Cluster-devel] [GFS2 PATCH][v2] " Bob Peterson
  2016-08-15  8:41       ` Steven Whitehouse
@ 2016-08-18 18:21       ` Christoph Hellwig
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2016-08-18 18:21 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Fri, Aug 12, 2016 at 03:57:59PM -0400, Bob Peterson wrote:
> | Is there a better way to pass the gh from the begin function to the end
> | function? I'm sure that will work, but it is the kind of thing that
> | might trip up the unwary in the future,
> 
> Yeah, I would have passed the gh another way, but Christoph didn't
> leave a good way in the iomap interface or other bits to do it.
> At least not that I can see. It might be worth changing the interface to
> add that flexibility for other file systems that might want to do this
> as well. It would be easy enough to add a (void *)private pointer or
> something that can be passed around. Christoph and Steve: Would you
> prefer I plumb something before I do the GFS2 bits?

The interface is not set in stone.  If you have a good suggestion
to improve it for your use case, send a patch.



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-08-18 18:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1311582922.2518701.1470926538032.JavaMail.zimbra@redhat.com>
2016-08-11 14:59 ` [Cluster-devel] [GFS2 PATCH] GFS2: Add function gfs2_get_iomap Bob Peterson
2016-08-11 15:26   ` Steven Whitehouse
2016-08-12 19:57     ` [Cluster-devel] [GFS2 PATCH][v2] " Bob Peterson
2016-08-15  8:41       ` Steven Whitehouse
2016-08-18 18:21       ` Christoph Hellwig
2016-08-11 17:17   ` [Cluster-devel] [GFS2 PATCH] " Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).