From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH v2 2/3] GFS2: Implement iomap for block_map
Date: Sat, 29 Oct 2016 10:24:45 +0100 [thread overview]
Message-ID: <58146ADD.9010702@redhat.com> (raw)
In-Reply-To: <1477682971-4517-3-git-send-email-rpeterso@redhat.com>
Hi,
Is there a missing patch here? I can see 0/3, 2/3 and 3/3, but not a 1/3
Overall this is looking a lot cleaner I think, but some comments below....
On 28/10/16 20:29, Bob Peterson wrote:
> This patch implements iomap for block mapping, and switches the
> block_map function to use it under the covers.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> fs/gfs2/bmap.c | 248 ++++++++++++++++++++++++++++++++++++++++++---------------
> fs/gfs2/bmap.h | 4 +
> 2 files changed, 189 insertions(+), 63 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 59b45b8..a654f67 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -13,6 +13,7 @@
> #include <linux/blkdev.h>
> #include <linux/gfs2_ondisk.h>
> #include <linux/crc32.h>
> +#include <linux/iomap.h>
>
> #include "gfs2.h"
> #include "incore.h"
> @@ -454,10 +455,8 @@ enum alloc_state {
> * Returns: errno on error
> */
>
> -static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> - bool zero_new, struct metapath *mp,
> - const size_t maxlen, sector_t *dblock,
> - unsigned *dblks)
> +static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
> + unsigned flags, struct metapath *mp)
> {
> struct gfs2_inode *ip = GFS2_I(inode);
> struct gfs2_sbd *sdp = GFS2_SB(inode);
> @@ -465,6 +464,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> struct buffer_head *dibh = mp->mp_bh[0];
> u64 bn;
> unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
> + unsigned dblks = 0;
> unsigned ptrs_per_blk;
> const unsigned end_of_metadata = mp->mp_fheight - 1;
> int ret;
> @@ -472,12 +472,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> enum alloc_state state;
> __be64 *ptr;
> __be64 zero_bn = 0;
> + size_t maxlen = iomap->length >> inode->i_blkbits;
>
> BUG_ON(mp->mp_aheight < 1);
> BUG_ON(dibh == NULL);
>
> - *dblock = 0;
> - *dblks = 0;
> gfs2_trans_add_meta(ip->i_gl, dibh);
>
> if (mp->mp_fheight == mp->mp_aheight) {
> @@ -485,16 +484,16 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> /* Bottom indirect block exists, find unalloced extent size */
> ptr = metapointer(end_of_metadata, mp);
> bh = mp->mp_bh[end_of_metadata];
> - *dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
> - maxlen, &eob);
> - BUG_ON(*dblks < 1);
> + dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
> + maxlen, &eob);
> + BUG_ON(dblks < 1);
> state = ALLOC_DATA;
> } else {
> /* Need to allocate indirect blocks */
> ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
> sdp->sd_diptrs;
> - *dblks = min(maxlen, (size_t)(ptrs_per_blk -
> - mp->mp_list[end_of_metadata]));
> + dblks = min(maxlen, (size_t)(ptrs_per_blk -
> + mp->mp_list[end_of_metadata]));
> if (mp->mp_fheight == ip->i_height) {
> /* Writing into existing tree, extend tree down */
> iblks = mp->mp_fheight - mp->mp_aheight;
> @@ -510,7 +509,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
>
> /* start of the second part of the function (state machine) */
>
> - blks = *dblks + iblks;
> + blks = dblks + iblks;
> i = mp->mp_aheight;
> do {
> int error;
> @@ -567,26 +566,28 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> break;
> /* Tree complete, adding data blocks */
> case ALLOC_DATA:
> - BUG_ON(n > *dblks);
> + BUG_ON(n > dblks);
> BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
> gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
> - *dblks = n;
> + dblks = n;
> ptr = metapointer(end_of_metadata, mp);
> - *dblock = bn;
> + iomap->blkno = bn;
> while (n-- > 0)
> *ptr++ = cpu_to_be64(bn++);
> - if (zero_new) {
> - ret = sb_issue_zeroout(sb, *dblock, *dblks,
> - GFP_NOFS);
> + if (flags & IOMAP_ZERO) {
> + ret = sb_issue_zeroout(sb, iomap->blkno,
> + dblks, GFP_NOFS);
> if (ret) {
> fs_err(sdp,
> "Failed to zero data buffers\n");
> + flags &= ~IOMAP_ZERO;
> }
> }
> break;
> }
> - } while ((state != ALLOC_DATA) || !(*dblock));
> + } while (iomap->blkno == IOMAP_NULL_BLOCK);
>
> + iomap->length = dblks << inode->i_blkbits;
> ip->i_height = mp->mp_fheight;
> gfs2_add_inode_blocks(&ip->i_inode, alloced);
> gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
> @@ -594,47 +595,103 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
> }
>
> /**
> - * gfs2_block_map - Map a block from an inode to a disk block
> - * @inode: The inode
> - * @lblock: The logical block number
> - * @bh_map: The bh to be mapped
> - * @create: True if its ok to alloc blocks to satify the request
> + * hole_size - figure out the size of a hole
> + * @ip: The inode
> + * @lblock: The logical starting block number
> + * @mp: The metapath
> *
> - * Sets buffer_mapped() if successful, sets buffer_boundary() if a
> - * read of metadata will be required before the next block can be
> - * mapped. Sets buffer_new() if new blocks were allocated.
> + * Returns: The hole size in bytes
> *
> - * Returns: errno
> */
> +static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_sbd *sdp = GFS2_SB(inode);
> + struct metapath mp_eof;
> + unsigned int end_of_metadata = ip->i_height - 1;
> + u64 factor = 1;
> + int hgt = end_of_metadata;
> + u64 holesz = 0, holestep;
> + const __be64 *first, *end, *ptr;
> + const struct buffer_head *bh;
> + u64 isize_blks = (i_size_read(inode) - 1) >> inode->i_blkbits;
> + int zeroptrs;
> + bool done = false;
> +
> + /* Get another metapath, to the very last byte */
> + find_metapath(sdp, isize_blks, &mp_eof, ip->i_height);
> + for (hgt = end_of_metadata; hgt >= 0 && !done; hgt--) {
> + bh = mp->mp_bh[hgt];
> + if (bh) {
> + zeroptrs = 0;
> + first = metapointer(hgt, mp);
> + end = (const __be64 *)(bh->b_data + bh->b_size);
> +
> + for (ptr = first; ptr < end; ptr++) {
> + if (*ptr) {
> + done = true;
> + break;
> + } else {
> + zeroptrs++;
> + }
> + }
> + } else {
> + zeroptrs = sdp->sd_inptrs;
> + }
> + holestep = min(factor * zeroptrs,
> + isize_blks - (lblock + (zeroptrs * holesz)));
> + holesz += holestep;
> + if (lblock + holesz >= isize_blks) {
> + holesz = isize_blks - lblock;
> + break;
> + }
>
> -int gfs2_block_map(struct inode *inode, sector_t lblock,
> - struct buffer_head *bh_map, int create)
> + factor *= sdp->sd_inptrs;
> + if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
> + (mp->mp_list[hgt - 1])++;
> + }
> + return holesz << inode->i_blkbits;
> +}
> +
> +/**
> + * _gfs2_get_iomap - Map blocks from an inode to disk blocks
> + * @mapping: The address space
> + * @pos: Starting position in bytes
> + * @length: Length to map, in bytes
> + * @iomap: The iomap structure
> + * @mp: An uninitialized metapath structure
> + * @release_mpath: if true, we need to release the metapath when done
> + *
> + * Returns: errno
> + */
> +static int _gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
> + struct iomap *iomap, unsigned flags)
> {
> struct gfs2_inode *ip = GFS2_I(inode);
> struct gfs2_sbd *sdp = GFS2_SB(inode);
> + struct metapath mp = { .mp_aheight = 1, };
> unsigned int bsize = sdp->sd_sb.sb_bsize;
> - const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
> + const size_t maxlen = length >> inode->i_blkbits;
> const u64 *arr = sdp->sd_heightsize;
> __be64 *ptr;
> - u64 size;
> - struct metapath mp;
> + sector_t lblock = pos >> sdp->sd_sb.sb_bsize_shift;
> int ret;
> int eob;
> unsigned int len;
> struct buffer_head *bh;
> u8 height;
> - bool zero_new = false;
> - sector_t dblock = 0;
> - unsigned dblks;
> + loff_t isize = i_size_read(inode);
>
> - BUG_ON(maxlen == 0);
> + if (maxlen == 0)
> + return -EINVAL;
> +
> + iomap->offset = pos;
> + iomap->blkno = IOMAP_NULL_BLOCK;
> + iomap->type = IOMAP_HOLE;
> + iomap->length = length;
> + iomap->flags = 0;
> + bmap_lock(ip, 0);
>
> - memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
> - bmap_lock(ip, create);
> - clear_buffer_mapped(bh_map);
> - clear_buffer_new(bh_map);
> - clear_buffer_boundary(bh_map);
> - trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
> if (gfs2_is_dir(ip)) {
> bsize = sdp->sd_jbsize;
> arr = sdp->sd_jheightsize;
> @@ -645,53 +702,118 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
> goto out;
>
> height = ip->i_height;
> - size = (lblock + 1) * bsize;
> - while (size > arr[height])
> + while ((lblock + 1) * bsize > arr[height])
> height++;
> find_metapath(sdp, lblock, &mp, height);
> - mp.mp_aheight = 1;
> if (height > ip->i_height || gfs2_is_stuffed(ip))
> goto do_alloc;
> +
> ret = lookup_metapath(ip, &mp);
> if (ret < 0)
> goto out;
> +
> if (mp.mp_aheight != ip->i_height)
> goto do_alloc;
> +
> ptr = metapointer(ip->i_height - 1, &mp);
> if (*ptr == 0)
> goto do_alloc;
> - map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
> +
> + iomap->type = IOMAP_MAPPED;
> + iomap->blkno = be64_to_cpu(*ptr);
> +
> bh = mp.mp_bh[ip->i_height - 1];
> len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
> - bh_map->b_size = (len << inode->i_blkbits);
> - if (eob)
> - set_buffer_boundary(bh_map);
> + iomap->length = (len << inode->i_blkbits);
> +
> + if (iomap->offset + iomap->length >= isize)
> + iomap->length = isize - iomap->offset;
> +
> ret = 0;
> +
> out:
> release_metapath(&mp);
> - trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
> - bmap_unlock(ip, create);
> + bmap_unlock(ip, 0);
> return ret;
>
> do_alloc:
> - /* All allocations are done here, firstly check create flag */
> - if (!create) {
> + if (!(flags & IOMAP_WRITE)) {
> + if (pos >= isize) {
> + ret = -ENOENT;
> + goto out;
> + }
> BUG_ON(gfs2_is_stuffed(ip));
> ret = 0;
> + iomap->length = hole_size(inode, lblock, &mp);
> goto out;
> }
>
> - /* At this point ret is the tree depth of already allocated blocks */
> + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
> + if (ret == 0)
> + iomap->flags = IOMAP_WRITE;
> + goto out;
> +}
> +
> +int gfs2_get_iomap(struct inode *inode, loff_t pos, ssize_t length,
> + struct iomap *iomap)
> +{
> + return _gfs2_get_iomap(inode, pos, length, iomap, 0);
> +}
> +
> +/**
> + * gfs2_block_map - Map a block from an inode to a disk block
> + * @inode: The inode
> + * @lblock: The logical block number
> + * @bh_map: The bh to be mapped
> + * @create: True if its ok to alloc blocks to satify the request
> + *
> + * Sets buffer_mapped() if successful, sets buffer_boundary() if a
> + * read of metadata will be required before the next block can be
> + * mapped. Sets buffer_new() if new blocks were allocated.
> + *
> + * Returns: errno
> + */
> +
> +int gfs2_block_map(struct inode *inode, sector_t lblock,
> + struct buffer_head *bh_map, int create)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_sbd *sdp = GFS2_SB(inode);
> + const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
> + struct iomap iomap;
> + int ret, flags = 0;
> +
> + BUG_ON(maxlen == 0);
> +
> + clear_buffer_mapped(bh_map);
> + clear_buffer_new(bh_map);
> + clear_buffer_boundary(bh_map);
> + trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
> +
We will need to have the ability to trace the iomap operation, since
that is now the important one. Since this tracepoint exposes the bh's
internal flags, I don't think we can just move this one, so perhaps best
to add a new trace_gfs2_iomap() tracepoint in _gfs2_get_iomap() plus we
need to let Paul know so he can update the PCP pmda too in due course.
> + if (create)
> + flags |= IOMAP_WRITE;
Hmm, I wonder why IOMAP_WRITE and IOMAP_ZERO are separate flags from the
iomap.flags field... Christoph, was there a specific reason for that? If
we could just use iomap.flags here it would save having the
gfs2_get_iomap() wrapper function for _gfs2_get_iomap(), and in our case
we need to update the zeroing flag based on the iomap result anyway. So
we'd just have additional flags IOMAP_F_ZERO and IOMAP_F_CREATE (or
IOMAP_F_WRITE) depending on which name is preferred.
> if (buffer_zeronew(bh_map))
> - zero_new = true;
> - ret = gfs2_bmap_alloc(inode, lblock, zero_new, &mp, maxlen, &dblock,
> - &dblks);
> - if (ret == 0) {
> - map_bh(bh_map, inode->i_sb, dblock);
> - bh_map->b_size = dblks << inode->i_blkbits;
> + flags |= IOMAP_ZERO;
> + ret = _gfs2_get_iomap(inode, lblock << sdp->sd_sb.sb_bsize_shift,
> + bh_map->b_size, &iomap, flags);
> +
> + iomap.length = round_up(iomap.length, sdp->sd_sb.sb_bsize);
> + bh_map->b_size = iomap.length;
> + if (maxlen >= iomap.length)
> + set_buffer_boundary(bh_map);
That seems to imply setting the boundary on every single mapping, since
the mapped length will always be equal to or less than the max length we
requested. Either we need to add a new IOMAP_F_BOUNDARY flag from which
to set the bh boundary flag, or if we are not getting any performance
benefit from it, then we could just drop support for this feature.
Either way we definitely don't want to set it for every mapping.
> +
> + if (ret)
> + goto out;
> +
> + if (iomap.blkno != IOMAP_NULL_BLOCK)
> + map_bh(bh_map, inode->i_sb, iomap.blkno);
> +
> + bh_map->b_size = iomap.length;
> + if (iomap.flags & IOMAP_WRITE)
> set_buffer_new(bh_map);
This should be:
if (iomap.flags & IOMAP_F_NEW)
set_buffer_new(bh_map);
and gfs2_get_iomap() should be setting that flag when the extent is
newly allocated. We should not be setting buffer new for every write,
but only when we've allocated new blocks. Also missing is something
along the lines of:
clear_buffer_zeronew(bh_map);
if (iomap.flags & IOMAP_F_ZERO)
set_buffer_zeronew(bh_map);
The zeronew flag is used both to request zeroing of a new extent and
also to report errors (i.e. did the zeroing request succeed) so please
check that fallocate() is still working correctly with the new
bmap/iomap code.
Steve.
> - }
> - goto out;
> +out:
> + trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
> + return ret;
> }
>
> /*
> diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
> index 81ded5e..8da2429 100644
> --- a/fs/gfs2/bmap.h
> +++ b/fs/gfs2/bmap.h
> @@ -10,6 +10,8 @@
> #ifndef __BMAP_DOT_H__
> #define __BMAP_DOT_H__
>
> +#include <linux/iomap.h>
> +
> #include "inode.h"
>
> struct inode;
> @@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
> extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
> extern int gfs2_block_map(struct inode *inode, sector_t lblock,
> struct buffer_head *bh, int create);
> +extern int gfs2_get_iomap(struct inode *inode, loff_t pos,
> + ssize_t length, struct iomap *iomap);
> extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
> u64 *dblock, unsigned *extlen);
> extern int gfs2_setattr_size(struct inode *inode, u64 size);
next prev parent reply other threads:[~2016-10-29 9:24 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-28 19:29 [Cluster-devel] [PATCH v2 0/3] Implement iomap for fiemap in GFS2 Bob Peterson
2016-10-28 19:29 ` [Cluster-devel] [PATCH v2 1/3] GFS2: Make height info part of metapath Bob Peterson
2016-10-28 19:29 ` [Cluster-devel] [PATCH v2 2/3] GFS2: Implement iomap for block_map Bob Peterson
2016-10-29 9:24 ` Steven Whitehouse [this message]
2016-10-31 12:07 ` Bob Peterson
2016-10-31 20:07 ` Dave Chinner
2016-11-02 9:37 ` Steven Whitehouse
2016-11-02 21:01 ` Dave Chinner
2016-11-03 9:45 ` Steven Whitehouse
2016-11-04 13:44 ` Christoph Hellwig
2016-11-07 12:03 ` Steven Whitehouse
2016-10-28 19:29 ` [Cluster-devel] [PATCH v2 3/3] GFS2: Switch fiemap implementation to use iomap Bob Peterson
2016-10-29 9:33 ` Steven Whitehouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=58146ADD.9010702@redhat.com \
--to=swhiteho@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.