* [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
@ 2017-08-28 15:06 Christoph Hellwig
2017-08-28 18:31 ` Darrick J. Wong
2017-08-28 21:20 ` Darrick J. Wong
0 siblings, 2 replies; 7+ messages in thread
From: Christoph Hellwig @ 2017-08-28 15:06 UTC (permalink / raw)
To: linux-xfs
Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.
This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map. This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.
There are two intentional behavior changes compared to the old code:
- the current code reports unwritten extents that don't directly border
a written one as unwritten even when not passing the BMV_IF_PREALLOC
option, contrary to the documentation. The new code requires the
BMV_IF_PREALLOC flag to report the unwrittent extent bit.
- The new code does never merges consecutive extents, unlike the old
code that sometimes does it based on the boundaries of the
xfs_bmapi_read calls. Note that the extent merging behavior was
entirely undocumented.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++-------------------------------
1 file changed, 185 insertions(+), 314 deletions(-)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 93e955262d07..5962f119d4ff 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -404,125 +404,69 @@ xfs_bmap_count_blocks(
return 0;
}
-/*
- * returns 1 for success, 0 if we failed to map the extent.
- */
-STATIC int
-xfs_getbmapx_fix_eof_hole(
- xfs_inode_t *ip, /* xfs incore inode pointer */
- int whichfork,
- struct getbmapx *out, /* output structure */
- int prealloced, /* this is a file with
- * preallocated data space */
- int64_t end, /* last block requested */
- xfs_fsblock_t startblock,
- bool moretocome)
+static void
+xfs_getbmap_report_one(
+ struct xfs_inode *ip,
+ struct getbmapx *bmv,
+ int64_t bmv_end,
+ struct xfs_bmbt_irec *got,
+ struct getbmapx *p)
{
- int64_t fixlen;
- xfs_mount_t *mp; /* file system mount point */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t lastx; /* last extent pointer */
- xfs_fileoff_t fileblock;
-
- if (startblock == HOLESTARTBLOCK) {
- mp = ip->i_mount;
- out->bmv_block = -1;
- fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
- fixlen -= out->bmv_offset;
- if (prealloced && out->bmv_offset + out->bmv_length == end) {
- /* Came to hole at EOF. Trim it. */
- if (fixlen <= 0)
- return 0;
- out->bmv_length = fixlen;
- }
+ if (isnullstartblock(got->br_startblock) ||
+ got->br_startblock == DELAYSTARTBLOCK) {
+ /*
+ * Delalloc extents that start beyond EOF can occur due to
+ * speculative EOF allocation when the delalloc extent is larger
+ * than the largest freespace extent at conversion time. These
+ * extents cannot be converted by data writeback, so can exist
+ * here even if we are not supposed to be finding delalloc
+ * extents.
+ */
+ if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
+ ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
+
+ p->bmv_oflags |= BMV_OF_DELALLOC;
+ p->bmv_block = -2;
} else {
- if (startblock == DELAYSTARTBLOCK)
- out->bmv_block = -2;
- else
- out->bmv_block = xfs_fsb_to_db(ip, startblock);
- fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (!moretocome &&
- xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
- (lastx == xfs_iext_count(ifp) - 1))
- out->bmv_oflags |= BMV_OF_LAST;
+ p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
}
- return 1;
+ if (got->br_state == XFS_EXT_UNWRITTEN &&
+ (bmv->bmv_iflags & BMV_IF_PREALLOC))
+ p->bmv_oflags |= BMV_OF_PREALLOC;
+
+ p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
+ p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
+
+ bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+ bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+ bmv->bmv_entries++;
}
-/* Adjust the reported bmap around shared/unshared extent transitions. */
-STATIC int
-xfs_getbmap_adjust_shared(
+static void
+xfs_getbmap_report_hole(
struct xfs_inode *ip,
- int whichfork,
- struct xfs_bmbt_irec *map,
- struct getbmapx *out,
- struct xfs_bmbt_irec *next_map)
+ struct getbmapx *bmv,
+ int64_t bmv_end,
+ xfs_fileoff_t bno,
+ xfs_fileoff_t end,
+ struct getbmapx *p)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- xfs_agblock_t ebno;
- xfs_extlen_t elen;
- xfs_extlen_t nlen;
- int error;
+ if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
+ return;
- next_map->br_startblock = NULLFSBLOCK;
- next_map->br_startoff = NULLFILEOFF;
- next_map->br_blockcount = 0;
+ p->bmv_block = -1;
+ p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
+ p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
- /* Only written data blocks can be shared. */
- if (!xfs_is_reflink_inode(ip) ||
- whichfork != XFS_DATA_FORK ||
- !xfs_bmap_is_real_extent(map))
- return 0;
-
- agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
- error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
- map->br_blockcount, &ebno, &elen, true);
- if (error)
- return error;
-
- if (ebno == NULLAGBLOCK) {
- /* No shared blocks at all. */
- return 0;
- } else if (agbno == ebno) {
- /*
- * Shared extent at (agbno, elen). Shrink the reported
- * extent length and prepare to move the start of map[i]
- * to agbno+elen, with the aim of (re)formatting the new
- * map[i] the next time through the inner loop.
- */
- out->bmv_length = XFS_FSB_TO_BB(mp, elen);
- out->bmv_oflags |= BMV_OF_SHARED;
- if (elen != map->br_blockcount) {
- *next_map = *map;
- next_map->br_startblock += elen;
- next_map->br_startoff += elen;
- next_map->br_blockcount -= elen;
- }
- map->br_blockcount -= elen;
- } else {
- /*
- * There's an unshared extent (agbno, ebno - agbno)
- * followed by shared extent at (ebno, elen). Shrink
- * the reported extent length to cover only the unshared
- * extent and prepare to move up the start of map[i] to
- * ebno, with the aim of (re)formatting the new map[i]
- * the next time through the inner loop.
- */
- *next_map = *map;
- nlen = ebno - agbno;
- out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
- next_map->br_startblock += nlen;
- next_map->br_startoff += nlen;
- next_map->br_blockcount -= nlen;
- map->br_blockcount -= nlen;
- }
+ bmv->bmv_offset = p->bmv_offset + p->bmv_length;
+ bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
+ bmv->bmv_entries++;
+}
- return 0;
+static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries)
+{
+ return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1;
}
/*
@@ -539,119 +483,72 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */
{
- int64_t bmvend; /* last block requested */
- int error = 0; /* return value */
- int64_t fixlen; /* length for -1 case */
- int i; /* extent number */
- int lock; /* lock state */
- xfs_bmbt_irec_t *map; /* buffer for user's data */
- xfs_mount_t *mp; /* file system mount point */
- int nex; /* # of user extents can do */
- int subnex; /* # of bmapi's can do */
- int nmap; /* number of map entries */
- struct getbmapx *out; /* output structure */
- int whichfork; /* data or attr fork */
- int prealloced; /* this is a file with
- * preallocated data space */
- int iflags; /* interface flags */
- int bmapi_flags; /* flags for xfs_bmapi */
- int cur_ext = 0;
- struct xfs_bmbt_irec inject_map;
-
- mp = ip->i_mount;
- iflags = bmv->bmv_iflags;
+ struct xfs_mount *mp = ip->i_mount;
+ int iflags = bmv->bmv_iflags;
+ int whichfork, lock, i, nr_entries = 0, error = 0;
+ int64_t bmv_end, max_len;
+ xfs_fileoff_t bno, first_bno;
+ struct xfs_ifork *ifp;
+ struct getbmapx *out;
+ struct xfs_bmbt_irec got, rec;
+ xfs_filblks_t len;
+ xfs_extnum_t idx;
#ifndef DEBUG
/* Only allow CoW fork queries if we're debugging. */
if (iflags & BMV_IF_COWFORK)
return -EINVAL;
#endif
+
if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
return -EINVAL;
+ if (bmv->bmv_count <= 1)
+ return -EINVAL;
+ if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+ return -ENOMEM;
+
+ if (bmv->bmv_length < -1)
+ return -EINVAL;
+
+ bmv->bmv_entries = 0;
+ if (bmv->bmv_length == 0)
+ return 0;
+
+ out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+ if (!out)
+ return -ENOMEM;
+
if (iflags & BMV_IF_ATTRFORK)
whichfork = XFS_ATTR_FORK;
else if (iflags & BMV_IF_COWFORK)
whichfork = XFS_COW_FORK;
else
whichfork = XFS_DATA_FORK;
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) {
case XFS_ATTR_FORK:
- if (XFS_IFORK_Q(ip)) {
- if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
- ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
- return -EINVAL;
- } else if (unlikely(
- ip->i_d.di_aformat != 0 &&
- ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
- XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
- ip->i_mount);
- return -EFSCORRUPTED;
- }
+ if (!XFS_IFORK_Q(ip))
+ goto out_unlock_iolock;
- prealloced = 0;
- fixlen = 1LL << 32;
+ max_len = 1LL << 32;
+ lock = xfs_ilock_attr_map_shared(ip);
break;
case XFS_COW_FORK:
- if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
- return -EINVAL;
+ /* No CoW fork? Just return */
+ if (!ifp)
+ goto out_unlock_iolock;
- if (xfs_get_cowextsz_hint(ip)) {
- prealloced = 1;
- fixlen = mp->m_super->s_maxbytes;
- } else {
- prealloced = 0;
- fixlen = XFS_ISIZE(ip);
- }
- break;
- default:
- /* Local format data forks report no extents. */
- if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
- bmv->bmv_entries = 0;
- return 0;
- }
- if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
- return -EINVAL;
+ if (xfs_get_cowextsz_hint(ip))
+ max_len = mp->m_super->s_maxbytes;
+ else
+ max_len = XFS_ISIZE(ip);
- if (xfs_get_extsz_hint(ip) ||
- ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
- prealloced = 1;
- fixlen = mp->m_super->s_maxbytes;
- } else {
- prealloced = 0;
- fixlen = XFS_ISIZE(ip);
- }
+ lock = XFS_ILOCK_SHARED;
+ xfs_ilock(ip, lock);
break;
- }
-
- if (bmv->bmv_length == -1) {
- fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
- bmv->bmv_length =
- max_t(int64_t, fixlen - bmv->bmv_offset, 0);
- } else if (bmv->bmv_length == 0) {
- bmv->bmv_entries = 0;
- return 0;
- } else if (bmv->bmv_length < 0) {
- return -EINVAL;
- }
-
- nex = bmv->bmv_count - 1;
- if (nex <= 0)
- return -EINVAL;
- bmvend = bmv->bmv_offset + bmv->bmv_length;
-
-
- if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
- return -ENOMEM;
- out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
- if (!out)
- return -ENOMEM;
-
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- switch (whichfork) {
case XFS_DATA_FORK:
if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
@@ -669,147 +566,121 @@ xfs_getbmap(
*/
}
+ if (xfs_get_extsz_hint(ip) ||
+ (ip->i_d.di_flags &
+ (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
+ max_len = mp->m_super->s_maxbytes;
+ else
+ max_len = XFS_ISIZE(ip);
+
lock = xfs_ilock_data_map_shared(ip);
break;
- case XFS_COW_FORK:
- lock = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lock);
- break;
- case XFS_ATTR_FORK:
- lock = xfs_ilock_attr_map_shared(ip);
+ }
+
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
break;
+ case XFS_DINODE_FMT_LOCAL:
+ /* Local format inode forks report no extents. */
+ goto out_unlock_ilock;
+ default:
+ error = -EINVAL;
+ goto out_unlock_ilock;
}
- /*
- * Don't let nex be bigger than the number of extents
- * we can have assuming alternating holes and real extents.
- */
- if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
- nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
+ if (bmv->bmv_length == -1) {
+ max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
+ bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
+ }
- bmapi_flags = xfs_bmapi_aflag(whichfork);
- if (!(iflags & BMV_IF_PREALLOC))
- bmapi_flags |= XFS_BMAPI_IGSTATE;
+ bmv_end = bmv->bmv_offset + bmv->bmv_length;
- /*
- * Allocate enough space to handle "subnex" maps at a time.
- */
- error = -ENOMEM;
- subnex = 16;
- map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
- if (!map)
+ first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
+ len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, whichfork);
+ if (error)
+ goto out_unlock_ilock;
+ }
+
+ if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
goto out_unlock_ilock;
- bmv->bmv_entries = 0;
+ while (!xfs_getbmap_full(bmv, nr_entries)) {
+ struct getbmapx *p = &out[nr_entries];
- if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
- (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
- error = 0;
- goto out_free_map;
- }
+ xfs_trim_extent(&got, first_bno, len);
- do {
- nmap = (nex> subnex) ? subnex : nex;
- error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
- XFS_BB_TO_FSB(mp, bmv->bmv_length),
- map, &nmap, bmapi_flags);
- if (error)
- goto out_free_map;
- ASSERT(nmap <= subnex);
-
- for (i = 0; i < nmap && bmv->bmv_length &&
- cur_ext < bmv->bmv_count - 1; i++) {
- out[cur_ext].bmv_oflags = 0;
- if (map[i].br_state == XFS_EXT_UNWRITTEN)
- out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
- else if (map[i].br_startblock == DELAYSTARTBLOCK)
- out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
- out[cur_ext].bmv_offset =
- XFS_FSB_TO_BB(mp, map[i].br_startoff);
- out[cur_ext].bmv_length =
- XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- out[cur_ext].bmv_unused1 = 0;
- out[cur_ext].bmv_unused2 = 0;
+ /*
+ * Report an entry for a hole if this extent doesn't directly
+ * follow the previous one.
+ */
+ if (got.br_startoff > bno) {
+ xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
+ got.br_startoff, p++);
+ if (xfs_getbmap_full(bmv, ++nr_entries))
+ break;
+ }
- /*
- * delayed allocation extents that start beyond EOF can
- * occur due to speculative EOF allocation when the
- * delalloc extent is larger than the largest freespace
- * extent at conversion time. These extents cannot be
- * converted by data writeback, so can exist here even
- * if we are not supposed to be finding delalloc
- * extents.
- */
- if (map[i].br_startblock == DELAYSTARTBLOCK &&
- map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
- ASSERT((iflags & BMV_IF_DELALLOC) != 0);
-
- if (map[i].br_startblock == HOLESTARTBLOCK &&
- whichfork == XFS_ATTR_FORK) {
- /* came to the end of attribute fork */
- out[cur_ext].bmv_oflags |= BMV_OF_LAST;
- goto out_free_map;
- }
+ /*
+ * In order to report shared extents accurately, we report each
+ * distinct shared / unshared part of a single bmbt record with
+ * an individual getbmapx record.
+ */
+ rec = got;
+ for (;;) {
+ bool shared = false, trimmed = false;
+ xfs_fileoff_t len;
- /* Is this a shared block? */
- error = xfs_getbmap_adjust_shared(ip, whichfork,
- &map[i], &out[cur_ext], &inject_map);
+ error = xfs_reflink_trim_around_shared(ip, &rec,
+ &shared, &trimmed);
if (error)
- goto out_free_map;
+ goto out_unlock_ilock;
- if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
- &out[cur_ext], prealloced, bmvend,
- map[i].br_startblock,
- inject_map.br_startblock != NULLFSBLOCK))
- goto out_free_map;
+ xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p);
+ if (shared)
+ p->bmv_oflags |= BMV_OF_SHARED;
+ if (!trimmed)
+ break;
- bmv->bmv_offset =
- out[cur_ext].bmv_offset +
- out[cur_ext].bmv_length;
- bmv->bmv_length =
- max_t(int64_t, 0, bmvend - bmv->bmv_offset);
+ len = got.br_startoff + got.br_blockcount -
+ (rec.br_startoff + rec.br_blockcount);
- /*
- * In case we don't want to return the hole,
- * don't increase cur_ext so that we can reuse
- * it in the next loop.
- */
- if ((iflags & BMV_IF_NO_HOLES) &&
- map[i].br_startblock == HOLESTARTBLOCK) {
- memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
- continue;
- }
+ rec.br_startoff += rec.br_blockcount;
+ if (rec.br_startblock != DELAYSTARTBLOCK)
+ rec.br_startblock += rec.br_blockcount;
+ rec.br_blockcount = len;
+ }
- /*
- * In order to report shared extents accurately,
- * we report each distinct shared/unshared part
- * of a single bmbt record using multiple bmap
- * extents. To make that happen, we iterate the
- * same map array item multiple times, each
- * time trimming out the subextent that we just
- * reported.
- *
- * Because of this, we must check the out array
- * index (cur_ext) directly against bmv_count-1
- * to avoid overflows.
- */
- if (inject_map.br_startblock != NULLFSBLOCK) {
- map[i] = inject_map;
- i--;
+ bno = got.br_startoff + got.br_blockcount;
+ nr_entries++;
+
+ if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
+ xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+
+ p->bmv_oflags |= BMV_OF_LAST;
+
+ if (whichfork != XFS_ATTR_FORK && bno < end &&
+ !xfs_getbmap_full(bmv, nr_entries)) {
+ xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
+ end, ++p);
+ nr_entries++;
}
- bmv->bmv_entries++;
- cur_ext++;
+ break;
}
- } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
- out_free_map:
- kmem_free(map);
- out_unlock_ilock:
+ if (bno >= first_bno + len)
+ break;
+ }
+
+out_unlock_ilock:
xfs_iunlock(ip, lock);
- out_unlock_iolock:
+out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- for (i = 0; i < cur_ext; i++) {
+ for (i = 0; i < nr_entries; i++) {
/* format results & advance arg */
error = formatter(&arg, &out[i]);
if (error)
--
2.11.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 15:06 [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
@ 2017-08-28 18:31 ` Darrick J. Wong
2017-08-28 19:35 ` Christoph Hellwig
2017-08-28 21:20 ` Darrick J. Wong
1 sibling, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2017-08-28 18:31 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote:
> Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> fixes up various bits that are eventually reported to userspace.
>
> This patch instead rewrites it to use xfs_iext_lookup_extent and
> xfs_iext_get_extent to iteratively process the extent map. This not
> only avoids the need to allocate a map for the returned xfs_bmbt_irec
> structures but also greatly simplified the code.
>
> There are two intentional behavior changes compared to the old code:
>
> - the current code reports unwritten extents that don't directly border
> a written one as unwritten even when not passing the BMV_IF_PREALLOC
> option, contrary to the documentation. The new code requires the
> BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> - The new code does never merges consecutive extents, unlike the old
> code that sometimes does it based on the boundaries of the
> xfs_bmapi_read calls. Note that the extent merging behavior was
> entirely undocumented.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Hmm... this causes at least a couple of xfstests regressions:
xfs/310, because we no longer merge adjacent bmap records. I think the
solution here is to change the extent count check to allow 2 extents.
xfs/245 because zero-mapping forks are no longer reported as having one
big "hole" extent; instead zero extents are reported back. How do we
want to handle this case?
--D
> ---
> fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++-------------------------------
> 1 file changed, 185 insertions(+), 314 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 93e955262d07..5962f119d4ff 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -404,125 +404,69 @@ xfs_bmap_count_blocks(
> return 0;
> }
>
> -/*
> - * returns 1 for success, 0 if we failed to map the extent.
> - */
> -STATIC int
> -xfs_getbmapx_fix_eof_hole(
> - xfs_inode_t *ip, /* xfs incore inode pointer */
> - int whichfork,
> - struct getbmapx *out, /* output structure */
> - int prealloced, /* this is a file with
> - * preallocated data space */
> - int64_t end, /* last block requested */
> - xfs_fsblock_t startblock,
> - bool moretocome)
> +static void
> +xfs_getbmap_report_one(
> + struct xfs_inode *ip,
> + struct getbmapx *bmv,
> + int64_t bmv_end,
> + struct xfs_bmbt_irec *got,
> + struct getbmapx *p)
> {
> - int64_t fixlen;
> - xfs_mount_t *mp; /* file system mount point */
> - xfs_ifork_t *ifp; /* inode fork pointer */
> - xfs_extnum_t lastx; /* last extent pointer */
> - xfs_fileoff_t fileblock;
> -
> - if (startblock == HOLESTARTBLOCK) {
> - mp = ip->i_mount;
> - out->bmv_block = -1;
> - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> - fixlen -= out->bmv_offset;
> - if (prealloced && out->bmv_offset + out->bmv_length == end) {
> - /* Came to hole at EOF. Trim it. */
> - if (fixlen <= 0)
> - return 0;
> - out->bmv_length = fixlen;
> - }
> + if (isnullstartblock(got->br_startblock) ||
> + got->br_startblock == DELAYSTARTBLOCK) {
> + /*
> + * Delalloc extents that start beyond EOF can occur due to
> + * speculative EOF allocation when the delalloc extent is larger
> + * than the largest freespace extent at conversion time. These
> + * extents cannot be converted by data writeback, so can exist
> + * here even if we are not supposed to be finding delalloc
> + * extents.
> + */
> + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> +
> + p->bmv_oflags |= BMV_OF_DELALLOC;
> + p->bmv_block = -2;
> } else {
> - if (startblock == DELAYSTARTBLOCK)
> - out->bmv_block = -2;
> - else
> - out->bmv_block = xfs_fsb_to_db(ip, startblock);
> - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
> - ifp = XFS_IFORK_PTR(ip, whichfork);
> - if (!moretocome &&
> - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
> - (lastx == xfs_iext_count(ifp) - 1))
> - out->bmv_oflags |= BMV_OF_LAST;
> + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
> }
>
> - return 1;
> + if (got->br_state == XFS_EXT_UNWRITTEN &&
> + (bmv->bmv_iflags & BMV_IF_PREALLOC))
> + p->bmv_oflags |= BMV_OF_PREALLOC;
> +
> + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
> + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
> +
> + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> + bmv->bmv_entries++;
> }
>
> -/* Adjust the reported bmap around shared/unshared extent transitions. */
> -STATIC int
> -xfs_getbmap_adjust_shared(
> +static void
> +xfs_getbmap_report_hole(
> struct xfs_inode *ip,
> - int whichfork,
> - struct xfs_bmbt_irec *map,
> - struct getbmapx *out,
> - struct xfs_bmbt_irec *next_map)
> + struct getbmapx *bmv,
> + int64_t bmv_end,
> + xfs_fileoff_t bno,
> + xfs_fileoff_t end,
> + struct getbmapx *p)
> {
> - struct xfs_mount *mp = ip->i_mount;
> - xfs_agnumber_t agno;
> - xfs_agblock_t agbno;
> - xfs_agblock_t ebno;
> - xfs_extlen_t elen;
> - xfs_extlen_t nlen;
> - int error;
> + if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
> + return;
>
> - next_map->br_startblock = NULLFSBLOCK;
> - next_map->br_startoff = NULLFILEOFF;
> - next_map->br_blockcount = 0;
> + p->bmv_block = -1;
> + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
> + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
>
> - /* Only written data blocks can be shared. */
> - if (!xfs_is_reflink_inode(ip) ||
> - whichfork != XFS_DATA_FORK ||
> - !xfs_bmap_is_real_extent(map))
> - return 0;
> -
> - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
> - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
> - error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
> - map->br_blockcount, &ebno, &elen, true);
> - if (error)
> - return error;
> -
> - if (ebno == NULLAGBLOCK) {
> - /* No shared blocks at all. */
> - return 0;
> - } else if (agbno == ebno) {
> - /*
> - * Shared extent at (agbno, elen). Shrink the reported
> - * extent length and prepare to move the start of map[i]
> - * to agbno+elen, with the aim of (re)formatting the new
> - * map[i] the next time through the inner loop.
> - */
> - out->bmv_length = XFS_FSB_TO_BB(mp, elen);
> - out->bmv_oflags |= BMV_OF_SHARED;
> - if (elen != map->br_blockcount) {
> - *next_map = *map;
> - next_map->br_startblock += elen;
> - next_map->br_startoff += elen;
> - next_map->br_blockcount -= elen;
> - }
> - map->br_blockcount -= elen;
> - } else {
> - /*
> - * There's an unshared extent (agbno, ebno - agbno)
> - * followed by shared extent at (ebno, elen). Shrink
> - * the reported extent length to cover only the unshared
> - * extent and prepare to move up the start of map[i] to
> - * ebno, with the aim of (re)formatting the new map[i]
> - * the next time through the inner loop.
> - */
> - *next_map = *map;
> - nlen = ebno - agbno;
> - out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
> - next_map->br_startblock += nlen;
> - next_map->br_startoff += nlen;
> - next_map->br_blockcount -= nlen;
> - map->br_blockcount -= nlen;
> - }
> + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> + bmv->bmv_entries++;
> +}
>
> - return 0;
> +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries)
> +{
> + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1;
> }
>
> /*
> @@ -539,119 +483,72 @@ xfs_getbmap(
> xfs_bmap_format_t formatter, /* format to user */
> void *arg) /* formatter arg */
> {
> - int64_t bmvend; /* last block requested */
> - int error = 0; /* return value */
> - int64_t fixlen; /* length for -1 case */
> - int i; /* extent number */
> - int lock; /* lock state */
> - xfs_bmbt_irec_t *map; /* buffer for user's data */
> - xfs_mount_t *mp; /* file system mount point */
> - int nex; /* # of user extents can do */
> - int subnex; /* # of bmapi's can do */
> - int nmap; /* number of map entries */
> - struct getbmapx *out; /* output structure */
> - int whichfork; /* data or attr fork */
> - int prealloced; /* this is a file with
> - * preallocated data space */
> - int iflags; /* interface flags */
> - int bmapi_flags; /* flags for xfs_bmapi */
> - int cur_ext = 0;
> - struct xfs_bmbt_irec inject_map;
> -
> - mp = ip->i_mount;
> - iflags = bmv->bmv_iflags;
> + struct xfs_mount *mp = ip->i_mount;
> + int iflags = bmv->bmv_iflags;
> + int whichfork, lock, i, nr_entries = 0, error = 0;
> + int64_t bmv_end, max_len;
> + xfs_fileoff_t bno, first_bno;
> + struct xfs_ifork *ifp;
> + struct getbmapx *out;
> + struct xfs_bmbt_irec got, rec;
> + xfs_filblks_t len;
> + xfs_extnum_t idx;
>
> #ifndef DEBUG
> /* Only allow CoW fork queries if we're debugging. */
> if (iflags & BMV_IF_COWFORK)
> return -EINVAL;
> #endif
> +
> if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
> return -EINVAL;
>
> + if (bmv->bmv_count <= 1)
> + return -EINVAL;
> + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> + return -ENOMEM;
> +
> + if (bmv->bmv_length < -1)
> + return -EINVAL;
> +
> + bmv->bmv_entries = 0;
> + if (bmv->bmv_length == 0)
> + return 0;
> +
> + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> + if (!out)
> + return -ENOMEM;
> +
> if (iflags & BMV_IF_ATTRFORK)
> whichfork = XFS_ATTR_FORK;
> else if (iflags & BMV_IF_COWFORK)
> whichfork = XFS_COW_FORK;
> else
> whichfork = XFS_DATA_FORK;
> + ifp = XFS_IFORK_PTR(ip, whichfork);
>
> + xfs_ilock(ip, XFS_IOLOCK_SHARED);
> switch (whichfork) {
> case XFS_ATTR_FORK:
> - if (XFS_IFORK_Q(ip)) {
> - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> - return -EINVAL;
> - } else if (unlikely(
> - ip->i_d.di_aformat != 0 &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
> - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
> - ip->i_mount);
> - return -EFSCORRUPTED;
> - }
> + if (!XFS_IFORK_Q(ip))
> + goto out_unlock_iolock;
>
> - prealloced = 0;
> - fixlen = 1LL << 32;
> + max_len = 1LL << 32;
> + lock = xfs_ilock_attr_map_shared(ip);
> break;
> case XFS_COW_FORK:
> - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
> - return -EINVAL;
> + /* No CoW fork? Just return */
> + if (!ifp)
> + goto out_unlock_iolock;
>
> - if (xfs_get_cowextsz_hint(ip)) {
> - prealloced = 1;
> - fixlen = mp->m_super->s_maxbytes;
> - } else {
> - prealloced = 0;
> - fixlen = XFS_ISIZE(ip);
> - }
> - break;
> - default:
> - /* Local format data forks report no extents. */
> - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> - bmv->bmv_entries = 0;
> - return 0;
> - }
> - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> - ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> - return -EINVAL;
> + if (xfs_get_cowextsz_hint(ip))
> + max_len = mp->m_super->s_maxbytes;
> + else
> + max_len = XFS_ISIZE(ip);
>
> - if (xfs_get_extsz_hint(ip) ||
> - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
> - prealloced = 1;
> - fixlen = mp->m_super->s_maxbytes;
> - } else {
> - prealloced = 0;
> - fixlen = XFS_ISIZE(ip);
> - }
> + lock = XFS_ILOCK_SHARED;
> + xfs_ilock(ip, lock);
> break;
> - }
> -
> - if (bmv->bmv_length == -1) {
> - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
> - bmv->bmv_length =
> - max_t(int64_t, fixlen - bmv->bmv_offset, 0);
> - } else if (bmv->bmv_length == 0) {
> - bmv->bmv_entries = 0;
> - return 0;
> - } else if (bmv->bmv_length < 0) {
> - return -EINVAL;
> - }
> -
> - nex = bmv->bmv_count - 1;
> - if (nex <= 0)
> - return -EINVAL;
> - bmvend = bmv->bmv_offset + bmv->bmv_length;
> -
> -
> - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> - return -ENOMEM;
> - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> - if (!out)
> - return -ENOMEM;
> -
> - xfs_ilock(ip, XFS_IOLOCK_SHARED);
> - switch (whichfork) {
> case XFS_DATA_FORK:
> if (!(iflags & BMV_IF_DELALLOC) &&
> (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
> @@ -669,147 +566,121 @@ xfs_getbmap(
> */
> }
>
> + if (xfs_get_extsz_hint(ip) ||
> + (ip->i_d.di_flags &
> + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
> + max_len = mp->m_super->s_maxbytes;
> + else
> + max_len = XFS_ISIZE(ip);
> +
> lock = xfs_ilock_data_map_shared(ip);
> break;
> - case XFS_COW_FORK:
> - lock = XFS_ILOCK_SHARED;
> - xfs_ilock(ip, lock);
> - break;
> - case XFS_ATTR_FORK:
> - lock = xfs_ilock_attr_map_shared(ip);
> + }
> +
> + switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> + case XFS_DINODE_FMT_EXTENTS:
> + case XFS_DINODE_FMT_BTREE:
> break;
> + case XFS_DINODE_FMT_LOCAL:
> + /* Local format inode forks report no extents. */
> + goto out_unlock_ilock;
> + default:
> + error = -EINVAL;
> + goto out_unlock_ilock;
> }
>
> - /*
> - * Don't let nex be bigger than the number of extents
> - * we can have assuming alternating holes and real extents.
> - */
> - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
> - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
> + if (bmv->bmv_length == -1) {
> + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
> + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
> + }
>
> - bmapi_flags = xfs_bmapi_aflag(whichfork);
> - if (!(iflags & BMV_IF_PREALLOC))
> - bmapi_flags |= XFS_BMAPI_IGSTATE;
> + bmv_end = bmv->bmv_offset + bmv->bmv_length;
>
> - /*
> - * Allocate enough space to handle "subnex" maps at a time.
> - */
> - error = -ENOMEM;
> - subnex = 16;
> - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
> - if (!map)
> + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
> + len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
> +
> + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> + error = xfs_iread_extents(NULL, ip, whichfork);
> + if (error)
> + goto out_unlock_ilock;
> + }
> +
> + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
> goto out_unlock_ilock;
>
> - bmv->bmv_entries = 0;
> + while (!xfs_getbmap_full(bmv, nr_entries)) {
> + struct getbmapx *p = &out[nr_entries];
>
> - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
> - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
> - error = 0;
> - goto out_free_map;
> - }
> + xfs_trim_extent(&got, first_bno, len);
>
> - do {
> - nmap = (nex> subnex) ? subnex : nex;
> - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
> - XFS_BB_TO_FSB(mp, bmv->bmv_length),
> - map, &nmap, bmapi_flags);
> - if (error)
> - goto out_free_map;
> - ASSERT(nmap <= subnex);
> -
> - for (i = 0; i < nmap && bmv->bmv_length &&
> - cur_ext < bmv->bmv_count - 1; i++) {
> - out[cur_ext].bmv_oflags = 0;
> - if (map[i].br_state == XFS_EXT_UNWRITTEN)
> - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
> - else if (map[i].br_startblock == DELAYSTARTBLOCK)
> - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
> - out[cur_ext].bmv_offset =
> - XFS_FSB_TO_BB(mp, map[i].br_startoff);
> - out[cur_ext].bmv_length =
> - XFS_FSB_TO_BB(mp, map[i].br_blockcount);
> - out[cur_ext].bmv_unused1 = 0;
> - out[cur_ext].bmv_unused2 = 0;
> + /*
> + * Report an entry for a hole if this extent doesn't directly
> + * follow the previous one.
> + */
> + if (got.br_startoff > bno) {
> + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
> + got.br_startoff, p++);
> + if (xfs_getbmap_full(bmv, ++nr_entries))
> + break;
> + }
>
> - /*
> - * delayed allocation extents that start beyond EOF can
> - * occur due to speculative EOF allocation when the
> - * delalloc extent is larger than the largest freespace
> - * extent at conversion time. These extents cannot be
> - * converted by data writeback, so can exist here even
> - * if we are not supposed to be finding delalloc
> - * extents.
> - */
> - if (map[i].br_startblock == DELAYSTARTBLOCK &&
> - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
> - ASSERT((iflags & BMV_IF_DELALLOC) != 0);
> -
> - if (map[i].br_startblock == HOLESTARTBLOCK &&
> - whichfork == XFS_ATTR_FORK) {
> - /* came to the end of attribute fork */
> - out[cur_ext].bmv_oflags |= BMV_OF_LAST;
> - goto out_free_map;
> - }
> + /*
> + * In order to report shared extents accurately, we report each
> + * distinct shared / unshared part of a single bmbt record with
> + * an individual getbmapx record.
> + */
> + rec = got;
> + for (;;) {
> + bool shared = false, trimmed = false;
> + xfs_fileoff_t len;
>
> - /* Is this a shared block? */
> - error = xfs_getbmap_adjust_shared(ip, whichfork,
> - &map[i], &out[cur_ext], &inject_map);
> + error = xfs_reflink_trim_around_shared(ip, &rec,
> + &shared, &trimmed);
> if (error)
> - goto out_free_map;
> + goto out_unlock_ilock;
>
> - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
> - &out[cur_ext], prealloced, bmvend,
> - map[i].br_startblock,
> - inject_map.br_startblock != NULLFSBLOCK))
> - goto out_free_map;
> + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p);
> + if (shared)
> + p->bmv_oflags |= BMV_OF_SHARED;
> + if (!trimmed)
> + break;
>
> - bmv->bmv_offset =
> - out[cur_ext].bmv_offset +
> - out[cur_ext].bmv_length;
> - bmv->bmv_length =
> - max_t(int64_t, 0, bmvend - bmv->bmv_offset);
> + len = got.br_startoff + got.br_blockcount -
> + (rec.br_startoff + rec.br_blockcount);
>
> - /*
> - * In case we don't want to return the hole,
> - * don't increase cur_ext so that we can reuse
> - * it in the next loop.
> - */
> - if ((iflags & BMV_IF_NO_HOLES) &&
> - map[i].br_startblock == HOLESTARTBLOCK) {
> - memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
> - continue;
> - }
> + rec.br_startoff += rec.br_blockcount;
> + if (rec.br_startblock != DELAYSTARTBLOCK)
> + rec.br_startblock += rec.br_blockcount;
> + rec.br_blockcount = len;
> + }
>
> - /*
> - * In order to report shared extents accurately,
> - * we report each distinct shared/unshared part
> - * of a single bmbt record using multiple bmap
> - * extents. To make that happen, we iterate the
> - * same map array item multiple times, each
> - * time trimming out the subextent that we just
> - * reported.
> - *
> - * Because of this, we must check the out array
> - * index (cur_ext) directly against bmv_count-1
> - * to avoid overflows.
> - */
> - if (inject_map.br_startblock != NULLFSBLOCK) {
> - map[i] = inject_map;
> - i--;
> + bno = got.br_startoff + got.br_blockcount;
> + nr_entries++;
> +
> + if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +
> + p->bmv_oflags |= BMV_OF_LAST;
> +
> + if (whichfork != XFS_ATTR_FORK && bno < end &&
> + !xfs_getbmap_full(bmv, nr_entries)) {
> + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
> + end, ++p);
> + nr_entries++;
> }
> - bmv->bmv_entries++;
> - cur_ext++;
> + break;
> }
> - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
>
> - out_free_map:
> - kmem_free(map);
> - out_unlock_ilock:
> + if (bno >= first_bno + len)
> + break;
> + }
> +
> +out_unlock_ilock:
> xfs_iunlock(ip, lock);
> - out_unlock_iolock:
> +out_unlock_iolock:
> xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>
> - for (i = 0; i < cur_ext; i++) {
> + for (i = 0; i < nr_entries; i++) {
> /* format results & advance arg */
> error = formatter(&arg, &out[i]);
> if (error)
> --
> 2.11.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 18:31 ` Darrick J. Wong
@ 2017-08-28 19:35 ` Christoph Hellwig
2017-08-28 21:01 ` Darrick J. Wong
0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2017-08-28 19:35 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs
On Mon, Aug 28, 2017 at 11:31:42AM -0700, Darrick J. Wong wrote:
> Hmm... this causes at least a couple of xfstests regressions:
Oh. Looks like I didn't do a rmap run as none of my runs showed
a regression.
> xfs/310, because we no longer merge adjacent bmap records. I think the
> solution here is to change the extent count check to allow 2 extents.
Yeah.
> xfs/245 because zero-mapping forks are no longer reported as having one
> big "hole" extent; instead zero extents are reported back. How do we
> want to handle this case?
Hmm. Looks like that is the COW fork, as even the current code
never reports a hole at then end when there are no extents at all.
It does however when we have at least one extent:
root@brick:/home/hch/work/xfs# uname -a
Linux brick 4.9.0-3-amd64 #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) x86_64 GNU/Linux
root@brick:/home/hch/work/xfs# truncate --size 10g foo
root@brick:/home/hch/work/xfs# xfs_bmap foo
foo: no extents
root@brick:/home/hch/work/xfs# fallocate -l 10m foo
root@brick:/home/hch/work/xfs# xfs_bmap foo
foo:
0: [0..20479]: 786860592..786881071
1: [20480..20971519]: hole
in this case I suspect we should try to treat the COW fork the
same. But let me take a more detailed look at xfs/245 on what's
going on there.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 19:35 ` Christoph Hellwig
@ 2017-08-28 21:01 ` Darrick J. Wong
2017-08-29 14:41 ` Christoph Hellwig
0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2017-08-28 21:01 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Mon, Aug 28, 2017 at 09:35:07PM +0200, Christoph Hellwig wrote:
> On Mon, Aug 28, 2017 at 11:31:42AM -0700, Darrick J. Wong wrote:
> > Hmm... this causes at least a couple of xfstests regressions:
>
> Oh. Looks like I didn't do a rmap run as none of my runs showed
> a regression.
>
> > xfs/310, because we no longer merge adjacent bmap records. I think the
> > solution here is to change the extent count check to allow 2 extents.
>
> Yeah.
>
> > xfs/245 because zero-mapping forks are no longer reported as having one
> > big "hole" extent; instead zero extents are reported back. How do we
> > want to handle this case?
>
> Hmm. Looks like that is the COW fork, as even the current code
> never reports a hole at then end when there are no extents at all.
> It does however when we have at least one extent:
>
> root@brick:/home/hch/work/xfs# uname -a
> Linux brick 4.9.0-3-amd64 #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) x86_64 GNU/Linux
> root@brick:/home/hch/work/xfs# truncate --size 10g foo
> root@brick:/home/hch/work/xfs# xfs_bmap foo
> foo: no extents
> root@brick:/home/hch/work/xfs# fallocate -l 10m foo
> root@brick:/home/hch/work/xfs# xfs_bmap foo
> foo:
> 0: [0..20479]: 786860592..786881071
> 1: [20480..20971519]: hole
>
> in this case I suspect we should try to treat the COW fork the
> same. But let me take a more detailed look at xfs/245 on what's
> going on there.
Aha, I see, it's only if you pass in -e to xfs_bmap then you get a hole record:
$ truncate -s 50m /storage/tmp/a
$ xfs_io -c 'bmap -elpv' /storage/tmp/a
/storage/tmp/a:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..102399]: hole 102400
$ xfs_bmap /storage/tmp/a
/storage/tmp/a: no extents
$ xfs_io -c 'bmap -lpv' /storage/tmp/a
/storage/tmp/a: no extents
(eesh.)
--D
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 15:06 [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
2017-08-28 18:31 ` Darrick J. Wong
@ 2017-08-28 21:20 ` Darrick J. Wong
2017-08-29 14:38 ` Christoph Hellwig
1 sibling, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2017-08-28 21:20 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote:
> Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> fixes up various bits that are eventually reported to userspace.
>
> This patch instead rewrites it to use xfs_iext_lookup_extent and
> xfs_iext_get_extent to iteratively process the extent map. This not
> only avoids the need to allocate a map for the returned xfs_bmbt_irec
> structures but also greatly simplified the code.
>
> There are two intentional behavior changes compared to the old code:
>
> - the current code reports unwritten extents that don't directly border
> a written one as unwritten even when not passing the BMV_IF_PREALLOC
> option, contrary to the documentation. The new code requires the
> BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> - The new code does never merges consecutive extents, unlike the old
> code that sometimes does it based on the boundaries of the
> xfs_bmapi_read calls. Note that the extent merging behavior was
> entirely undocumented.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++-------------------------------
> 1 file changed, 185 insertions(+), 314 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 93e955262d07..5962f119d4ff 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -404,125 +404,69 @@ xfs_bmap_count_blocks(
> return 0;
> }
>
> -/*
> - * returns 1 for success, 0 if we failed to map the extent.
> - */
> -STATIC int
> -xfs_getbmapx_fix_eof_hole(
> - xfs_inode_t *ip, /* xfs incore inode pointer */
> - int whichfork,
> - struct getbmapx *out, /* output structure */
> - int prealloced, /* this is a file with
> - * preallocated data space */
> - int64_t end, /* last block requested */
> - xfs_fsblock_t startblock,
> - bool moretocome)
> +static void
> +xfs_getbmap_report_one(
> + struct xfs_inode *ip,
> + struct getbmapx *bmv,
> + int64_t bmv_end,
> + struct xfs_bmbt_irec *got,
> + struct getbmapx *p)
> {
> - int64_t fixlen;
> - xfs_mount_t *mp; /* file system mount point */
> - xfs_ifork_t *ifp; /* inode fork pointer */
> - xfs_extnum_t lastx; /* last extent pointer */
> - xfs_fileoff_t fileblock;
> -
> - if (startblock == HOLESTARTBLOCK) {
> - mp = ip->i_mount;
> - out->bmv_block = -1;
> - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> - fixlen -= out->bmv_offset;
> - if (prealloced && out->bmv_offset + out->bmv_length == end) {
> - /* Came to hole at EOF. Trim it. */
> - if (fixlen <= 0)
> - return 0;
> - out->bmv_length = fixlen;
> - }
> + if (isnullstartblock(got->br_startblock) ||
> + got->br_startblock == DELAYSTARTBLOCK) {
> + /*
> + * Delalloc extents that start beyond EOF can occur due to
> + * speculative EOF allocation when the delalloc extent is larger
> + * than the largest freespace extent at conversion time. These
> + * extents cannot be converted by data writeback, so can exist
> + * here even if we are not supposed to be finding delalloc
> + * extents.
> + */
> + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> +
> + p->bmv_oflags |= BMV_OF_DELALLOC;
> + p->bmv_block = -2;
> } else {
> - if (startblock == DELAYSTARTBLOCK)
> - out->bmv_block = -2;
> - else
> - out->bmv_block = xfs_fsb_to_db(ip, startblock);
> - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
> - ifp = XFS_IFORK_PTR(ip, whichfork);
> - if (!moretocome &&
> - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
> - (lastx == xfs_iext_count(ifp) - 1))
> - out->bmv_oflags |= BMV_OF_LAST;
> + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
> }
>
> - return 1;
> + if (got->br_state == XFS_EXT_UNWRITTEN &&
> + (bmv->bmv_iflags & BMV_IF_PREALLOC))
> + p->bmv_oflags |= BMV_OF_PREALLOC;
> +
> + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
> + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
> +
> + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> + bmv->bmv_entries++;
> }
>
> -/* Adjust the reported bmap around shared/unshared extent transitions. */
> -STATIC int
> -xfs_getbmap_adjust_shared(
> +static void
> +xfs_getbmap_report_hole(
> struct xfs_inode *ip,
> - int whichfork,
> - struct xfs_bmbt_irec *map,
> - struct getbmapx *out,
> - struct xfs_bmbt_irec *next_map)
> + struct getbmapx *bmv,
> + int64_t bmv_end,
> + xfs_fileoff_t bno,
> + xfs_fileoff_t end,
> + struct getbmapx *p)
> {
> - struct xfs_mount *mp = ip->i_mount;
> - xfs_agnumber_t agno;
> - xfs_agblock_t agbno;
> - xfs_agblock_t ebno;
> - xfs_extlen_t elen;
> - xfs_extlen_t nlen;
> - int error;
> + if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
> + return;
>
> - next_map->br_startblock = NULLFSBLOCK;
> - next_map->br_startoff = NULLFILEOFF;
> - next_map->br_blockcount = 0;
> + p->bmv_block = -1;
> + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
> + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
>
> - /* Only written data blocks can be shared. */
> - if (!xfs_is_reflink_inode(ip) ||
> - whichfork != XFS_DATA_FORK ||
> - !xfs_bmap_is_real_extent(map))
> - return 0;
> -
> - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
> - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
> - error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
> - map->br_blockcount, &ebno, &elen, true);
> - if (error)
> - return error;
> -
> - if (ebno == NULLAGBLOCK) {
> - /* No shared blocks at all. */
> - return 0;
> - } else if (agbno == ebno) {
> - /*
> - * Shared extent at (agbno, elen). Shrink the reported
> - * extent length and prepare to move the start of map[i]
> - * to agbno+elen, with the aim of (re)formatting the new
> - * map[i] the next time through the inner loop.
> - */
> - out->bmv_length = XFS_FSB_TO_BB(mp, elen);
> - out->bmv_oflags |= BMV_OF_SHARED;
> - if (elen != map->br_blockcount) {
> - *next_map = *map;
> - next_map->br_startblock += elen;
> - next_map->br_startoff += elen;
> - next_map->br_blockcount -= elen;
> - }
> - map->br_blockcount -= elen;
> - } else {
> - /*
> - * There's an unshared extent (agbno, ebno - agbno)
> - * followed by shared extent at (ebno, elen). Shrink
> - * the reported extent length to cover only the unshared
> - * extent and prepare to move up the start of map[i] to
> - * ebno, with the aim of (re)formatting the new map[i]
> - * the next time through the inner loop.
> - */
> - *next_map = *map;
> - nlen = ebno - agbno;
> - out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
> - next_map->br_startblock += nlen;
> - next_map->br_startoff += nlen;
> - next_map->br_blockcount -= nlen;
> - map->br_blockcount -= nlen;
> - }
> + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> + bmv->bmv_entries++;
> +}
>
> - return 0;
> +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries)
> +{
> + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1;
> }
>
> /*
> @@ -539,119 +483,72 @@ xfs_getbmap(
> xfs_bmap_format_t formatter, /* format to user */
> void *arg) /* formatter arg */
> {
> - int64_t bmvend; /* last block requested */
> - int error = 0; /* return value */
> - int64_t fixlen; /* length for -1 case */
> - int i; /* extent number */
> - int lock; /* lock state */
> - xfs_bmbt_irec_t *map; /* buffer for user's data */
> - xfs_mount_t *mp; /* file system mount point */
> - int nex; /* # of user extents can do */
> - int subnex; /* # of bmapi's can do */
> - int nmap; /* number of map entries */
> - struct getbmapx *out; /* output structure */
> - int whichfork; /* data or attr fork */
> - int prealloced; /* this is a file with
> - * preallocated data space */
> - int iflags; /* interface flags */
> - int bmapi_flags; /* flags for xfs_bmapi */
> - int cur_ext = 0;
> - struct xfs_bmbt_irec inject_map;
> -
> - mp = ip->i_mount;
> - iflags = bmv->bmv_iflags;
> + struct xfs_mount *mp = ip->i_mount;
> + int iflags = bmv->bmv_iflags;
> + int whichfork, lock, i, nr_entries = 0, error = 0;
> + int64_t bmv_end, max_len;
> + xfs_fileoff_t bno, first_bno;
> + struct xfs_ifork *ifp;
> + struct getbmapx *out;
> + struct xfs_bmbt_irec got, rec;
> + xfs_filblks_t len;
> + xfs_extnum_t idx;
>
> #ifndef DEBUG
> /* Only allow CoW fork queries if we're debugging. */
> if (iflags & BMV_IF_COWFORK)
> return -EINVAL;
> #endif
> +
> if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
> return -EINVAL;
>
> + if (bmv->bmv_count <= 1)
> + return -EINVAL;
> + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> + return -ENOMEM;
> +
> + if (bmv->bmv_length < -1)
> + return -EINVAL;
> +
> + bmv->bmv_entries = 0;
> + if (bmv->bmv_length == 0)
> + return 0;
> +
> + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> + if (!out)
> + return -ENOMEM;
> +
> if (iflags & BMV_IF_ATTRFORK)
> whichfork = XFS_ATTR_FORK;
> else if (iflags & BMV_IF_COWFORK)
> whichfork = XFS_COW_FORK;
> else
> whichfork = XFS_DATA_FORK;
> + ifp = XFS_IFORK_PTR(ip, whichfork);
>
> + xfs_ilock(ip, XFS_IOLOCK_SHARED);
> switch (whichfork) {
> case XFS_ATTR_FORK:
> - if (XFS_IFORK_Q(ip)) {
> - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> - return -EINVAL;
> - } else if (unlikely(
> - ip->i_d.di_aformat != 0 &&
> - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
> - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
> - ip->i_mount);
> - return -EFSCORRUPTED;
> - }
> + if (!XFS_IFORK_Q(ip))
> + goto out_unlock_iolock;
>
> - prealloced = 0;
> - fixlen = 1LL << 32;
> + max_len = 1LL << 32;
> + lock = xfs_ilock_attr_map_shared(ip);
> break;
> case XFS_COW_FORK:
> - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
> - return -EINVAL;
> + /* No CoW fork? Just return */
> + if (!ifp)
> + goto out_unlock_iolock;
>
> - if (xfs_get_cowextsz_hint(ip)) {
> - prealloced = 1;
> - fixlen = mp->m_super->s_maxbytes;
> - } else {
> - prealloced = 0;
> - fixlen = XFS_ISIZE(ip);
> - }
> - break;
> - default:
> - /* Local format data forks report no extents. */
> - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> - bmv->bmv_entries = 0;
> - return 0;
> - }
> - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> - ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> - return -EINVAL;
> + if (xfs_get_cowextsz_hint(ip))
> + max_len = mp->m_super->s_maxbytes;
> + else
> + max_len = XFS_ISIZE(ip);
>
> - if (xfs_get_extsz_hint(ip) ||
> - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
> - prealloced = 1;
> - fixlen = mp->m_super->s_maxbytes;
> - } else {
> - prealloced = 0;
> - fixlen = XFS_ISIZE(ip);
> - }
> + lock = XFS_ILOCK_SHARED;
> + xfs_ilock(ip, lock);
> break;
> - }
> -
> - if (bmv->bmv_length == -1) {
> - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
> - bmv->bmv_length =
> - max_t(int64_t, fixlen - bmv->bmv_offset, 0);
> - } else if (bmv->bmv_length == 0) {
> - bmv->bmv_entries = 0;
> - return 0;
> - } else if (bmv->bmv_length < 0) {
> - return -EINVAL;
> - }
> -
> - nex = bmv->bmv_count - 1;
> - if (nex <= 0)
> - return -EINVAL;
> - bmvend = bmv->bmv_offset + bmv->bmv_length;
> -
> -
> - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> - return -ENOMEM;
> - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> - if (!out)
> - return -ENOMEM;
> -
> - xfs_ilock(ip, XFS_IOLOCK_SHARED);
> - switch (whichfork) {
> case XFS_DATA_FORK:
> if (!(iflags & BMV_IF_DELALLOC) &&
> (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
> @@ -669,147 +566,121 @@ xfs_getbmap(
> */
> }
>
> + if (xfs_get_extsz_hint(ip) ||
> + (ip->i_d.di_flags &
> + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
> + max_len = mp->m_super->s_maxbytes;
> + else
> + max_len = XFS_ISIZE(ip);
> +
> lock = xfs_ilock_data_map_shared(ip);
> break;
> - case XFS_COW_FORK:
> - lock = XFS_ILOCK_SHARED;
> - xfs_ilock(ip, lock);
> - break;
> - case XFS_ATTR_FORK:
> - lock = xfs_ilock_attr_map_shared(ip);
> + }
> +
> + switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> + case XFS_DINODE_FMT_EXTENTS:
> + case XFS_DINODE_FMT_BTREE:
> break;
> + case XFS_DINODE_FMT_LOCAL:
> + /* Local format inode forks report no extents. */
> + goto out_unlock_ilock;
> + default:
> + error = -EINVAL;
> + goto out_unlock_ilock;
> }
>
> - /*
> - * Don't let nex be bigger than the number of extents
> - * we can have assuming alternating holes and real extents.
> - */
> - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
> - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
> + if (bmv->bmv_length == -1) {
> + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
> + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
> + }
>
> - bmapi_flags = xfs_bmapi_aflag(whichfork);
> - if (!(iflags & BMV_IF_PREALLOC))
> - bmapi_flags |= XFS_BMAPI_IGSTATE;
> + bmv_end = bmv->bmv_offset + bmv->bmv_length;
>
> - /*
> - * Allocate enough space to handle "subnex" maps at a time.
> - */
> - error = -ENOMEM;
> - subnex = 16;
> - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
> - if (!map)
> + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
> + len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
> +
> + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> + error = xfs_iread_extents(NULL, ip, whichfork);
> + if (error)
> + goto out_unlock_ilock;
> + }
> +
> + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
> goto out_unlock_ilock;
>
> - bmv->bmv_entries = 0;
> + while (!xfs_getbmap_full(bmv, nr_entries)) {
> + struct getbmapx *p = &out[nr_entries];
>
> - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
> - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
> - error = 0;
> - goto out_free_map;
> - }
> + xfs_trim_extent(&got, first_bno, len);
>
> - do {
> - nmap = (nex> subnex) ? subnex : nex;
> - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
> - XFS_BB_TO_FSB(mp, bmv->bmv_length),
> - map, &nmap, bmapi_flags);
> - if (error)
> - goto out_free_map;
> - ASSERT(nmap <= subnex);
> -
> - for (i = 0; i < nmap && bmv->bmv_length &&
> - cur_ext < bmv->bmv_count - 1; i++) {
> - out[cur_ext].bmv_oflags = 0;
> - if (map[i].br_state == XFS_EXT_UNWRITTEN)
> - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
> - else if (map[i].br_startblock == DELAYSTARTBLOCK)
> - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
> - out[cur_ext].bmv_offset =
> - XFS_FSB_TO_BB(mp, map[i].br_startoff);
> - out[cur_ext].bmv_length =
> - XFS_FSB_TO_BB(mp, map[i].br_blockcount);
> - out[cur_ext].bmv_unused1 = 0;
> - out[cur_ext].bmv_unused2 = 0;
> + /*
> + * Report an entry for a hole if this extent doesn't directly
> + * follow the previous one.
> + */
> + if (got.br_startoff > bno) {
> + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
> + got.br_startoff, p++);
> + if (xfs_getbmap_full(bmv, ++nr_entries))
> + break;
> + }
>
> - /*
> - * delayed allocation extents that start beyond EOF can
> - * occur due to speculative EOF allocation when the
> - * delalloc extent is larger than the largest freespace
> - * extent at conversion time. These extents cannot be
> - * converted by data writeback, so can exist here even
> - * if we are not supposed to be finding delalloc
> - * extents.
> - */
> - if (map[i].br_startblock == DELAYSTARTBLOCK &&
> - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
> - ASSERT((iflags & BMV_IF_DELALLOC) != 0);
> -
> - if (map[i].br_startblock == HOLESTARTBLOCK &&
> - whichfork == XFS_ATTR_FORK) {
> - /* came to the end of attribute fork */
> - out[cur_ext].bmv_oflags |= BMV_OF_LAST;
> - goto out_free_map;
> - }
> + /*
> + * In order to report shared extents accurately, we report each
> + * distinct shared / unshared part of a single bmbt record with
> + * an individual getbmapx record.
> + */
> + rec = got;
> + for (;;) {
while (!xfs_getbmap_full()) ?
> + bool shared = false, trimmed = false;
> + xfs_fileoff_t len;
>
> - /* Is this a shared block? */
> - error = xfs_getbmap_adjust_shared(ip, whichfork,
> - &map[i], &out[cur_ext], &inject_map);
> + error = xfs_reflink_trim_around_shared(ip, &rec,
> + &shared, &trimmed);
> if (error)
> - goto out_free_map;
> + goto out_unlock_ilock;
>
> - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
> - &out[cur_ext], prealloced, bmvend,
> - map[i].br_startblock,
> - inject_map.br_startblock != NULLFSBLOCK))
> - goto out_free_map;
> + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p);
> + if (shared)
> + p->bmv_oflags |= BMV_OF_SHARED;
Shouldn't we advance p/nr_entries? What if we have a single partially
shared extent? Also, what's the difference between nr_entries and
bmv->bmv_entries? They both track the number of bmv entries we've
filled out, right?
I tried a simple test (which I guess we should turn into an xfstests, sigh):
$ xfs_io -c 'pwrite 0 1m' /opt/a -f
wrote 1048576/1048576 bytes at offset 0
1 MiB, 256 ops; 0.0000 sec (180.571 MiB/sec and 46226.0744 ops/sec)
$ cp --reflink=always /opt/a /opt/b
$ xfs_io -c 'bmap -elpv' /opt/a
/opt/a:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..2047]: 192..2239 0 (192..2239) 2048 100000
$ xfs_io -c 'bmap -elpv' /opt/b
/opt/b:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..2047]: 192..2239 0 (192..2239) 2048 100000
Then tried to CoW the middle of the extent:
$ xfs_io -c 'pwrite 200k 4k' -c 'pwrite 700k 4k' -c fsync /opt/b
wrote 4096/4096 bytes at offset 204800
4 KiB, 1 ops; 0.0000 sec (5.964 MiB/sec and 1526.7176 ops/sec)
wrote 4096/4096 bytes at offset 716800
4 KiB, 1 ops; 0.0000 sec (19.829 MiB/sec and 5076.1421 ops/sec)
$ xfs_io -c 'bmap -elpv' /opt/a
/opt/a:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [1408..2047]: 1600..2239 0 (1600..2239) 640 100000
1: [4289600..8495807]: delalloc 4206208
2: [4289432..8591138]: 4206160..8507866 2 (929360..5231066) 4301707 000000
3: [4611686027017322496..461168602 4301714..8602913 2 (1024914..5326113) 4301200 000000
4: [4226304..-4611686009833227009] 0..-4611686009837453313 0 (0..-4611686009837453313) -4611686009837453312 000000
$ xfs_io -c 'bmap -elpv' /opt/b
/opt/b:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..399]: 192..591 0 (192..591) 400 100000
1: [400..407]: 2384..2391 0 (2384..2391) 8 000000
2: [408..1399]: 600..1591 0 (600..1591) 992 100000
3: [1400..1407]: 3384..3391 0 (3384..3391) 8 000000
4: [1408..2047]: 1600..2239 0 (1600..2239) 640 100000
Ugh, something is seriously messed up here. By comparison, FIEMAP works fine:
$ filefrag -v /opt/a
Filesystem type is: 58465342
File size of /opt/a is 1048576 (256 blocks of 4096 bytes)
ext: logical_offset: physical_offset: length: expected: flags:
0: 0.. 49: 24.. 73: 50: shared
1: 50.. 50: 74.. 74: 1:
2: 51.. 174: 75.. 198: 124: shared
3: 175.. 175: 199.. 199: 1:
4: 176.. 255: 200.. 279: 80: last,shared,eof
/opt/a: 1 extent found
$ filefrag -v /opt/b
Filesystem type is: 58465342
File size of /opt/b is 1048576 (256 blocks of 4096 bytes)
ext: logical_offset: physical_offset: length: expected: flags:
0: 0.. 49: 24.. 73: 50: shared
1: 50.. 50: 298.. 298: 1: 74:
2: 51.. 174: 75.. 198: 124: 299: shared
3: 175.. 175: 423.. 423: 1: 199:
4: 176.. 255: 200.. 279: 80: 424: last,shared,eof
/opt/b: 5 extents found
> + if (!trimmed)
> + break;
>
> - bmv->bmv_offset =
> - out[cur_ext].bmv_offset +
> - out[cur_ext].bmv_length;
> - bmv->bmv_length =
> - max_t(int64_t, 0, bmvend - bmv->bmv_offset);
> + len = got.br_startoff + got.br_blockcount -
> + (rec.br_startoff + rec.br_blockcount);
>
> - /*
> - * In case we don't want to return the hole,
> - * don't increase cur_ext so that we can reuse
> - * it in the next loop.
> - */
> - if ((iflags & BMV_IF_NO_HOLES) &&
> - map[i].br_startblock == HOLESTARTBLOCK) {
> - memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
> - continue;
> - }
> + rec.br_startoff += rec.br_blockcount;
> + if (rec.br_startblock != DELAYSTARTBLOCK)
> + rec.br_startblock += rec.br_blockcount;
> + rec.br_blockcount = len;
> + }
>
> - /*
> - * In order to report shared extents accurately,
> - * we report each distinct shared/unshared part
> - * of a single bmbt record using multiple bmap
> - * extents. To make that happen, we iterate the
> - * same map array item multiple times, each
> - * time trimming out the subextent that we just
> - * reported.
> - *
> - * Because of this, we must check the out array
> - * index (cur_ext) directly against bmv_count-1
> - * to avoid overflows.
> - */
> - if (inject_map.br_startblock != NULLFSBLOCK) {
> - map[i] = inject_map;
> - i--;
> + bno = got.br_startoff + got.br_blockcount;
> + nr_entries++;
> +
> + if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +
> + p->bmv_oflags |= BMV_OF_LAST;
Isn't BMV_OF_LAST supposed to be set only on the last extent of the
dataset returned? If I ask for the mappings for blocks 100-200 and
there's a hole from 190-200, shouldn't OF_LAST be set on the 190-200
extent?
--D
> +
> + if (whichfork != XFS_ATTR_FORK && bno < end &&
> + !xfs_getbmap_full(bmv, nr_entries)) {
> + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
> + end, ++p);
> + nr_entries++;
> }
> - bmv->bmv_entries++;
> - cur_ext++;
> + break;
> }
> - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
>
> - out_free_map:
> - kmem_free(map);
> - out_unlock_ilock:
> + if (bno >= first_bno + len)
> + break;
> + }
> +
> +out_unlock_ilock:
> xfs_iunlock(ip, lock);
> - out_unlock_iolock:
> +out_unlock_iolock:
> xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>
> - for (i = 0; i < cur_ext; i++) {
> + for (i = 0; i < nr_entries; i++) {
> /* format results & advance arg */
> error = formatter(&arg, &out[i]);
> if (error)
> --
> 2.11.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 21:20 ` Darrick J. Wong
@ 2017-08-29 14:38 ` Christoph Hellwig
0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2017-08-29 14:38 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs
On Mon, Aug 28, 2017 at 02:20:24PM -0700, Darrick J. Wong wrote:
> On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote:
> > Currently getbmap uses xfs_bmapi_read to query the extent map, and then
> > fixes up various bits that are eventually reported to userspace.
> >
> > This patch instead rewrites it to use xfs_iext_lookup_extent and
> > xfs_iext_get_extent to iteratively process the extent map. This not
> > only avoids the need to allocate a map for the returned xfs_bmbt_irec
> > structures but also greatly simplified the code.
> >
> > There are two intentional behavior changes compared to the old code:
> >
> > - the current code reports unwritten extents that don't directly border
> > a written one as unwritten even when not passing the BMV_IF_PREALLOC
> > option, contrary to the documentation. The new code requires the
> > BMV_IF_PREALLOC flag to report the unwrittent extent bit.
> > - The new code does never merges consecutive extents, unlike the old
> > code that sometimes does it based on the boundaries of the
> > xfs_bmapi_read calls. Note that the extent merging behavior was
> > entirely undocumented.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> > fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++-------------------------------
> > 1 file changed, 185 insertions(+), 314 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> > index 93e955262d07..5962f119d4ff 100644
> > --- a/fs/xfs/xfs_bmap_util.c
> > +++ b/fs/xfs/xfs_bmap_util.c
> > @@ -404,125 +404,69 @@ xfs_bmap_count_blocks(
> > return 0;
> > }
> >
> > -/*
> > - * returns 1 for success, 0 if we failed to map the extent.
> > - */
> > -STATIC int
> > -xfs_getbmapx_fix_eof_hole(
> > - xfs_inode_t *ip, /* xfs incore inode pointer */
> > - int whichfork,
> > - struct getbmapx *out, /* output structure */
> > - int prealloced, /* this is a file with
> > - * preallocated data space */
> > - int64_t end, /* last block requested */
> > - xfs_fsblock_t startblock,
> > - bool moretocome)
> > +static void
> > +xfs_getbmap_report_one(
> > + struct xfs_inode *ip,
> > + struct getbmapx *bmv,
> > + int64_t bmv_end,
> > + struct xfs_bmbt_irec *got,
> > + struct getbmapx *p)
> > {
> > - int64_t fixlen;
> > - xfs_mount_t *mp; /* file system mount point */
> > - xfs_ifork_t *ifp; /* inode fork pointer */
> > - xfs_extnum_t lastx; /* last extent pointer */
> > - xfs_fileoff_t fileblock;
> > -
> > - if (startblock == HOLESTARTBLOCK) {
> > - mp = ip->i_mount;
> > - out->bmv_block = -1;
> > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
> > - fixlen -= out->bmv_offset;
> > - if (prealloced && out->bmv_offset + out->bmv_length == end) {
> > - /* Came to hole at EOF. Trim it. */
> > - if (fixlen <= 0)
> > - return 0;
> > - out->bmv_length = fixlen;
> > - }
> > + if (isnullstartblock(got->br_startblock) ||
> > + got->br_startblock == DELAYSTARTBLOCK) {
> > + /*
> > + * Delalloc extents that start beyond EOF can occur due to
> > + * speculative EOF allocation when the delalloc extent is larger
> > + * than the largest freespace extent at conversion time. These
> > + * extents cannot be converted by data writeback, so can exist
> > + * here even if we are not supposed to be finding delalloc
> > + * extents.
> > + */
> > + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
> > + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
> > +
> > + p->bmv_oflags |= BMV_OF_DELALLOC;
> > + p->bmv_block = -2;
> > } else {
> > - if (startblock == DELAYSTARTBLOCK)
> > - out->bmv_block = -2;
> > - else
> > - out->bmv_block = xfs_fsb_to_db(ip, startblock);
> > - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
> > - ifp = XFS_IFORK_PTR(ip, whichfork);
> > - if (!moretocome &&
> > - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
> > - (lastx == xfs_iext_count(ifp) - 1))
> > - out->bmv_oflags |= BMV_OF_LAST;
> > + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
> > }
> >
> > - return 1;
> > + if (got->br_state == XFS_EXT_UNWRITTEN &&
> > + (bmv->bmv_iflags & BMV_IF_PREALLOC))
> > + p->bmv_oflags |= BMV_OF_PREALLOC;
> > +
> > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
> > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
> > +
> > + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> > + bmv->bmv_entries++;
> > }
> >
> > -/* Adjust the reported bmap around shared/unshared extent transitions. */
> > -STATIC int
> > -xfs_getbmap_adjust_shared(
> > +static void
> > +xfs_getbmap_report_hole(
> > struct xfs_inode *ip,
> > - int whichfork,
> > - struct xfs_bmbt_irec *map,
> > - struct getbmapx *out,
> > - struct xfs_bmbt_irec *next_map)
> > + struct getbmapx *bmv,
> > + int64_t bmv_end,
> > + xfs_fileoff_t bno,
> > + xfs_fileoff_t end,
> > + struct getbmapx *p)
> > {
> > - struct xfs_mount *mp = ip->i_mount;
> > - xfs_agnumber_t agno;
> > - xfs_agblock_t agbno;
> > - xfs_agblock_t ebno;
> > - xfs_extlen_t elen;
> > - xfs_extlen_t nlen;
> > - int error;
> > + if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
> > + return;
> >
> > - next_map->br_startblock = NULLFSBLOCK;
> > - next_map->br_startoff = NULLFILEOFF;
> > - next_map->br_blockcount = 0;
> > + p->bmv_block = -1;
> > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
> > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
> >
> > - /* Only written data blocks can be shared. */
> > - if (!xfs_is_reflink_inode(ip) ||
> > - whichfork != XFS_DATA_FORK ||
> > - !xfs_bmap_is_real_extent(map))
> > - return 0;
> > -
> > - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
> > - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
> > - error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
> > - map->br_blockcount, &ebno, &elen, true);
> > - if (error)
> > - return error;
> > -
> > - if (ebno == NULLAGBLOCK) {
> > - /* No shared blocks at all. */
> > - return 0;
> > - } else if (agbno == ebno) {
> > - /*
> > - * Shared extent at (agbno, elen). Shrink the reported
> > - * extent length and prepare to move the start of map[i]
> > - * to agbno+elen, with the aim of (re)formatting the new
> > - * map[i] the next time through the inner loop.
> > - */
> > - out->bmv_length = XFS_FSB_TO_BB(mp, elen);
> > - out->bmv_oflags |= BMV_OF_SHARED;
> > - if (elen != map->br_blockcount) {
> > - *next_map = *map;
> > - next_map->br_startblock += elen;
> > - next_map->br_startoff += elen;
> > - next_map->br_blockcount -= elen;
> > - }
> > - map->br_blockcount -= elen;
> > - } else {
> > - /*
> > - * There's an unshared extent (agbno, ebno - agbno)
> > - * followed by shared extent at (ebno, elen). Shrink
> > - * the reported extent length to cover only the unshared
> > - * extent and prepare to move up the start of map[i] to
> > - * ebno, with the aim of (re)formatting the new map[i]
> > - * the next time through the inner loop.
> > - */
> > - *next_map = *map;
> > - nlen = ebno - agbno;
> > - out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
> > - next_map->br_startblock += nlen;
> > - next_map->br_startoff += nlen;
> > - next_map->br_blockcount -= nlen;
> > - map->br_blockcount -= nlen;
> > - }
> > + bmv->bmv_offset = p->bmv_offset + p->bmv_length;
> > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
> > + bmv->bmv_entries++;
> > +}
> >
> > - return 0;
> > +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries)
> > +{
> > + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1;
> > }
> >
> > /*
> > @@ -539,119 +483,72 @@ xfs_getbmap(
> > xfs_bmap_format_t formatter, /* format to user */
> > void *arg) /* formatter arg */
> > {
> > - int64_t bmvend; /* last block requested */
> > - int error = 0; /* return value */
> > - int64_t fixlen; /* length for -1 case */
> > - int i; /* extent number */
> > - int lock; /* lock state */
> > - xfs_bmbt_irec_t *map; /* buffer for user's data */
> > - xfs_mount_t *mp; /* file system mount point */
> > - int nex; /* # of user extents can do */
> > - int subnex; /* # of bmapi's can do */
> > - int nmap; /* number of map entries */
> > - struct getbmapx *out; /* output structure */
> > - int whichfork; /* data or attr fork */
> > - int prealloced; /* this is a file with
> > - * preallocated data space */
> > - int iflags; /* interface flags */
> > - int bmapi_flags; /* flags for xfs_bmapi */
> > - int cur_ext = 0;
> > - struct xfs_bmbt_irec inject_map;
> > -
> > - mp = ip->i_mount;
> > - iflags = bmv->bmv_iflags;
> > + struct xfs_mount *mp = ip->i_mount;
> > + int iflags = bmv->bmv_iflags;
> > + int whichfork, lock, i, nr_entries = 0, error = 0;
> > + int64_t bmv_end, max_len;
> > + xfs_fileoff_t bno, first_bno;
> > + struct xfs_ifork *ifp;
> > + struct getbmapx *out;
> > + struct xfs_bmbt_irec got, rec;
> > + xfs_filblks_t len;
> > + xfs_extnum_t idx;
> >
> > #ifndef DEBUG
> > /* Only allow CoW fork queries if we're debugging. */
> > if (iflags & BMV_IF_COWFORK)
> > return -EINVAL;
> > #endif
> > +
> > if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
> > return -EINVAL;
> >
> > + if (bmv->bmv_count <= 1)
> > + return -EINVAL;
> > + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> > + return -ENOMEM;
> > +
> > + if (bmv->bmv_length < -1)
> > + return -EINVAL;
> > +
> > + bmv->bmv_entries = 0;
> > + if (bmv->bmv_length == 0)
> > + return 0;
> > +
> > + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> > + if (!out)
> > + return -ENOMEM;
> > +
> > if (iflags & BMV_IF_ATTRFORK)
> > whichfork = XFS_ATTR_FORK;
> > else if (iflags & BMV_IF_COWFORK)
> > whichfork = XFS_COW_FORK;
> > else
> > whichfork = XFS_DATA_FORK;
> > + ifp = XFS_IFORK_PTR(ip, whichfork);
> >
> > + xfs_ilock(ip, XFS_IOLOCK_SHARED);
> > switch (whichfork) {
> > case XFS_ATTR_FORK:
> > - if (XFS_IFORK_Q(ip)) {
> > - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
> > - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
> > - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> > - return -EINVAL;
> > - } else if (unlikely(
> > - ip->i_d.di_aformat != 0 &&
> > - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
> > - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
> > - ip->i_mount);
> > - return -EFSCORRUPTED;
> > - }
> > + if (!XFS_IFORK_Q(ip))
> > + goto out_unlock_iolock;
> >
> > - prealloced = 0;
> > - fixlen = 1LL << 32;
> > + max_len = 1LL << 32;
> > + lock = xfs_ilock_attr_map_shared(ip);
> > break;
> > case XFS_COW_FORK:
> > - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
> > - return -EINVAL;
> > + /* No CoW fork? Just return */
> > + if (!ifp)
> > + goto out_unlock_iolock;
> >
> > - if (xfs_get_cowextsz_hint(ip)) {
> > - prealloced = 1;
> > - fixlen = mp->m_super->s_maxbytes;
> > - } else {
> > - prealloced = 0;
> > - fixlen = XFS_ISIZE(ip);
> > - }
> > - break;
> > - default:
> > - /* Local format data forks report no extents. */
> > - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> > - bmv->bmv_entries = 0;
> > - return 0;
> > - }
> > - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> > - ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> > - return -EINVAL;
> > + if (xfs_get_cowextsz_hint(ip))
> > + max_len = mp->m_super->s_maxbytes;
> > + else
> > + max_len = XFS_ISIZE(ip);
> >
> > - if (xfs_get_extsz_hint(ip) ||
> > - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
> > - prealloced = 1;
> > - fixlen = mp->m_super->s_maxbytes;
> > - } else {
> > - prealloced = 0;
> > - fixlen = XFS_ISIZE(ip);
> > - }
> > + lock = XFS_ILOCK_SHARED;
> > + xfs_ilock(ip, lock);
> > break;
> > - }
> > -
> > - if (bmv->bmv_length == -1) {
> > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
> > - bmv->bmv_length =
> > - max_t(int64_t, fixlen - bmv->bmv_offset, 0);
> > - } else if (bmv->bmv_length == 0) {
> > - bmv->bmv_entries = 0;
> > - return 0;
> > - } else if (bmv->bmv_length < 0) {
> > - return -EINVAL;
> > - }
> > -
> > - nex = bmv->bmv_count - 1;
> > - if (nex <= 0)
> > - return -EINVAL;
> > - bmvend = bmv->bmv_offset + bmv->bmv_length;
> > -
> > -
> > - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
> > - return -ENOMEM;
> > - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
> > - if (!out)
> > - return -ENOMEM;
> > -
> > - xfs_ilock(ip, XFS_IOLOCK_SHARED);
> > - switch (whichfork) {
> > case XFS_DATA_FORK:
> > if (!(iflags & BMV_IF_DELALLOC) &&
> > (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
> > @@ -669,147 +566,121 @@ xfs_getbmap(
> > */
> > }
> >
> > + if (xfs_get_extsz_hint(ip) ||
> > + (ip->i_d.di_flags &
> > + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
> > + max_len = mp->m_super->s_maxbytes;
> > + else
> > + max_len = XFS_ISIZE(ip);
> > +
> > lock = xfs_ilock_data_map_shared(ip);
> > break;
> > - case XFS_COW_FORK:
> > - lock = XFS_ILOCK_SHARED;
> > - xfs_ilock(ip, lock);
> > - break;
> > - case XFS_ATTR_FORK:
> > - lock = xfs_ilock_attr_map_shared(ip);
> > + }
> > +
> > + switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> > + case XFS_DINODE_FMT_EXTENTS:
> > + case XFS_DINODE_FMT_BTREE:
> > break;
> > + case XFS_DINODE_FMT_LOCAL:
> > + /* Local format inode forks report no extents. */
> > + goto out_unlock_ilock;
> > + default:
> > + error = -EINVAL;
> > + goto out_unlock_ilock;
> > }
> >
> > - /*
> > - * Don't let nex be bigger than the number of extents
> > - * we can have assuming alternating holes and real extents.
> > - */
> > - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
> > - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
> > + if (bmv->bmv_length == -1) {
> > + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
> > + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
> > + }
> >
> > - bmapi_flags = xfs_bmapi_aflag(whichfork);
> > - if (!(iflags & BMV_IF_PREALLOC))
> > - bmapi_flags |= XFS_BMAPI_IGSTATE;
> > + bmv_end = bmv->bmv_offset + bmv->bmv_length;
> >
> > - /*
> > - * Allocate enough space to handle "subnex" maps at a time.
> > - */
> > - error = -ENOMEM;
> > - subnex = 16;
> > - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
> > - if (!map)
> > + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
> > + len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
> > +
> > + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > + error = xfs_iread_extents(NULL, ip, whichfork);
> > + if (error)
> > + goto out_unlock_ilock;
> > + }
> > +
> > + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
> > goto out_unlock_ilock;
> >
> > - bmv->bmv_entries = 0;
> > + while (!xfs_getbmap_full(bmv, nr_entries)) {
> > + struct getbmapx *p = &out[nr_entries];
> >
> > - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
> > - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
> > - error = 0;
> > - goto out_free_map;
> > - }
> > + xfs_trim_extent(&got, first_bno, len);
> >
> > - do {
> > - nmap = (nex> subnex) ? subnex : nex;
> > - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
> > - XFS_BB_TO_FSB(mp, bmv->bmv_length),
> > - map, &nmap, bmapi_flags);
> > - if (error)
> > - goto out_free_map;
> > - ASSERT(nmap <= subnex);
> > -
> > - for (i = 0; i < nmap && bmv->bmv_length &&
> > - cur_ext < bmv->bmv_count - 1; i++) {
> > - out[cur_ext].bmv_oflags = 0;
> > - if (map[i].br_state == XFS_EXT_UNWRITTEN)
> > - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
> > - else if (map[i].br_startblock == DELAYSTARTBLOCK)
> > - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
> > - out[cur_ext].bmv_offset =
> > - XFS_FSB_TO_BB(mp, map[i].br_startoff);
> > - out[cur_ext].bmv_length =
> > - XFS_FSB_TO_BB(mp, map[i].br_blockcount);
> > - out[cur_ext].bmv_unused1 = 0;
> > - out[cur_ext].bmv_unused2 = 0;
> > + /*
> > + * Report an entry for a hole if this extent doesn't directly
> > + * follow the previous one.
> > + */
> > + if (got.br_startoff > bno) {
> > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno,
> > + got.br_startoff, p++);
> > + if (xfs_getbmap_full(bmv, ++nr_entries))
> > + break;
> > + }
> >
> > - /*
> > - * delayed allocation extents that start beyond EOF can
> > - * occur due to speculative EOF allocation when the
> > - * delalloc extent is larger than the largest freespace
> > - * extent at conversion time. These extents cannot be
> > - * converted by data writeback, so can exist here even
> > - * if we are not supposed to be finding delalloc
> > - * extents.
> > - */
> > - if (map[i].br_startblock == DELAYSTARTBLOCK &&
> > - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
> > - ASSERT((iflags & BMV_IF_DELALLOC) != 0);
> > -
> > - if (map[i].br_startblock == HOLESTARTBLOCK &&
> > - whichfork == XFS_ATTR_FORK) {
> > - /* came to the end of attribute fork */
> > - out[cur_ext].bmv_oflags |= BMV_OF_LAST;
> > - goto out_free_map;
> > - }
> > + /*
> > + * In order to report shared extents accurately, we report each
> > + * distinct shared / unshared part of a single bmbt record with
> > + * an individual getbmapx record.
> > + */
> > + rec = got;
> > + for (;;) {
>
> while (!xfs_getbmap_full()) ?
Yeah.
> > - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
> > - &out[cur_ext], prealloced, bmvend,
> > - map[i].br_startblock,
> > - inject_map.br_startblock != NULLFSBLOCK))
> > - goto out_free_map;
> > + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p);
> > + if (shared)
> > + p->bmv_oflags |= BMV_OF_SHARED;
>
> Shouldn't we advance p/nr_entries? What if we have a single partially
> shared extent? Also, what's the difference between nr_entries and
> bmv->bmv_entries? They both track the number of bmv entries we've
> filled out, right?
We should, but I messed it up. And no test caught it..
> > + if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
> > + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> > +
> > + p->bmv_oflags |= BMV_OF_LAST;
>
> Isn't BMV_OF_LAST supposed to be set only on the last extent of the
> dataset returned? If I ask for the mappings for blocks 100-200 and
> there's a hole from 190-200, shouldn't OF_LAST be set on the 190-200
> extent?
BMV_OF_LAST doesn't seem to be document, and isn't checked by any
any userspace I could find. But my interpretation was that it's
supposed to be set on the last extent of a file, based on:
#define BMV_OF_LAST 0x4 /* segment is the last in the file */
now of course we could argue of a hole is an extent in a file;
by any normal defintion it isn't, but in terms of getbmap it
could be considered one.
The old code did this to set it:
if (startblock == HOLESTARTBLOCK) {
...
} else {
....
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
(lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
}
which means it sets it on the last actually allocated extent in a file
for the general ase.
But it also does a weird thing for holes in attr forks where it sets
BMV_OF_LAST on an entry that is beyond bmv_entries and this should
not even be considered valid by the caller.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers
2017-08-28 21:01 ` Darrick J. Wong
@ 2017-08-29 14:41 ` Christoph Hellwig
0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2017-08-29 14:41 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Christoph Hellwig, linux-xfs
On Mon, Aug 28, 2017 at 02:01:25PM -0700, Darrick J. Wong wrote:
> Aha, I see, it's only if you pass in -e to xfs_bmap then you get a hole record:
Good luck passing -e to xfs_bmap:
root@brick:/home/hch/work/xfsprogs# xfs_bmap -e foo
Illegal option -e
Usage: xfs_bmap [-adlpvV] [-n nx] file...
:)
>
> $ truncate -s 50m /storage/tmp/a
> $ xfs_io -c 'bmap -elpv' /storage/tmp/a
> /storage/tmp/a:
> EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
> 0: [0..102399]: hole 102400
> $ xfs_bmap /storage/tmp/a
> /storage/tmp/a: no extents
> $ xfs_io -c 'bmap -lpv' /storage/tmp/a
> /storage/tmp/a: no extents
>
> (eesh.)
But only if there are no other extents:
root@brick:/home/hch/work/xfsprogs# truncate -s 50m foo
root@brick:/home/hch/work/xfsprogs# xfs_io -c 'bmap -elpv' foo
foo:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..102399]: hole 102400
root@brick:/home/hch/work/xfsprogs# echo bar > foo
root@brick:/home/hch/work/xfsprogs# xfs_io -c 'bmap -elpv' foo
foo:
EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS
0: [0..7]: 730691408..730691415 3 (390992..390999) 8 000000
I think we need to actually define getbmap semantics, as it seems
to be a collection of accidental corner cases..
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-08-29 14:41 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-28 15:06 [PATCH] xfs: rewrite getbmap using the xfs_iext_* helpers Christoph Hellwig
2017-08-28 18:31 ` Darrick J. Wong
2017-08-28 19:35 ` Christoph Hellwig
2017-08-28 21:01 ` Darrick J. Wong
2017-08-29 14:41 ` Christoph Hellwig
2017-08-28 21:20 ` Darrick J. Wong
2017-08-29 14:38 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).