* Re: [PATCH v3 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate
2014-05-27 8:18 [PATCH v3 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate Namjae Jeon
@ 2014-05-27 18:40 ` Brian Foster
2014-05-27 20:59 ` Brian Foster
1 sibling, 0 replies; 4+ messages in thread
From: Brian Foster @ 2014-05-27 18:40 UTC (permalink / raw)
To: Namjae Jeon; +Cc: linux-fsdevel, Ashish Sangwan, xfs
On Tue, May 27, 2014 at 05:18:57PM +0900, Namjae Jeon wrote:
> This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.
>
> 1) Make sure that both offset and len are block size aligned.
> 2) Update the i_size of inode by len bytes.
> 3) Compute the file's logical block number against offset. If the computed
> block number is not the starting block of the extent, split the extent
> such that the block number is the starting block of the extent.
> 4) Shift all the extents which are lying bewteen [offset, last allocated extent]
> towards right by len bytes. This step will make a hole of len bytes
> at offset.
> 5) Allocate unwritten extents for the hole created in step 4.
>
> Cc: Brian Foster <bfoster@redhat.com>
> Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
> ---
This version looks pretty good to me. Thanks Namje.
Reviewed-by: Brian Foster <bfoster@redhat.com>
>
> Changelog
> v3:
> - remove XFS_TRANS_RESERVE and assert.
> - update the comment of blockcount calculation.
> - use 'if(blockcount)' instead of 'if (got.br_blockcount < blockcount)'.
> - move insert_file_space() calling under xfs_setattr_size to avoid code duplicate.
>
> v2:
> - remove reserved enable.
> - add xfs_qm_dqattach.
> - reset blockcount in xfs_bmap_shift_extents_right.
> - update i_size to avoid data loss before insert_file_space() is called.
> - use in-memory extent array size that delayed allocation extents.
>
> fs/xfs/xfs_bmap.c | 377 ++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/xfs/xfs_bmap.h | 9 +-
> fs/xfs/xfs_bmap_util.c | 128 ++++++++++++++++-
> fs/xfs/xfs_bmap_util.h | 2 +
> fs/xfs/xfs_file.c | 38 ++++-
> fs/xfs/xfs_trace.h | 1 +
> 6 files changed, 550 insertions(+), 5 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
> index 1ff0da6..8023b7e 100644
> --- a/fs/xfs/xfs_bmap.c
> +++ b/fs/xfs/xfs_bmap.c
> @@ -5419,7 +5419,7 @@ error0:
> * into, this will be considered invalid operation and we abort immediately.
> */
> int
> -xfs_bmap_shift_extents(
> +xfs_bmap_shift_extents_left(
> struct xfs_trans *tp,
> struct xfs_inode *ip,
> int *done,
> @@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
> (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> - XFS_ERROR_REPORT("xfs_bmap_shift_extents",
> + XFS_ERROR_REPORT("xfs_bmap_shift_extents_left",
> XFS_ERRLEVEL_LOW, mp);
> return XFS_ERROR(EFSCORRUPTED);
> }
> @@ -5606,3 +5606,376 @@ del_cursor:
> xfs_trans_log_inode(tp, ip, logflags);
> return error;
> }
> +
> +/*
> + * Splits an extent into two extents at split_fsb block that it is
> + * the first block of the current_ext. @current_ext is a target extent
> + * to be splitted. @split_fsb is a block where the extents is spliited.
> + * If split_fsb lies in a hole or the first block of extents, just return 0.
> + */
> +STATIC int
> +xfs_bmap_split_extent_at(
> + struct xfs_trans *tp,
> + struct xfs_inode *ip,
> + xfs_fileoff_t split_fsb,
> + xfs_extnum_t *current_ext,
> + xfs_fsblock_t *firstfsb,
> + struct xfs_bmap_free *free_list)
> +{
> + int whichfork = XFS_DATA_FORK;
> + struct xfs_btree_cur *cur;
> + struct xfs_bmbt_rec_host *gotp;
> + struct xfs_bmbt_irec got;
> + struct xfs_bmbt_irec new; /* splitted extent */
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_ifork *ifp;
> + xfs_fsblock_t gotblkcnt; /* new block count for got */
> + int error = 0;
> + int logflags;
> + int i = 0;
> +
> + if (unlikely(XFS_TEST_ERROR(
> + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> + XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
> + XFS_ERRLEVEL_LOW, mp);
> + return XFS_ERROR(EFSCORRUPTED);
> + }
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return XFS_ERROR(EIO);
> +
> + ASSERT(current_ext != NULL);
> +
> + ifp = XFS_IFORK_PTR(ip, whichfork);
> + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> + /* Read in all the extents */
> + error = xfs_iread_extents(tp, ip, whichfork);
> + if (error)
> + return error;
> + }
> +
> + gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
> + /*
> + * gotp can be null in 2 cases: 1) if there are no extents
> + * or 2) split_fsb lies in a hole beyond which there are
> + * no extents. Either way, we are done.
> + */
> + if (!gotp)
> + return 0;
> +
> + xfs_bmbt_get_all(gotp, &got);
> +
> + /*
> + * Check split_fsb lies in a hole or the start boundary offset
> + * of the extent.
> + */
> + if (got.br_startoff >= split_fsb)
> + return 0;
> +
> + gotblkcnt = split_fsb - got.br_startoff;
> + new.br_startoff = split_fsb;
> + new.br_startblock = got.br_startblock + gotblkcnt;
> + new.br_blockcount = got.br_blockcount - gotblkcnt;
> + new.br_state = got.br_state;
> +
> + /* We are going to change core inode */
> + logflags = XFS_ILOG_CORE;
> +
> + if (ifp->if_flags & XFS_IFBROOT) {
> + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
> + cur->bc_private.b.firstblock = *firstfsb;
> + cur->bc_private.b.flist = free_list;
> + cur->bc_private.b.flags = 0;
> + } else {
> + cur = NULL;
> + logflags |= XFS_ILOG_DEXT;
> + }
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + xfs_bmbt_set_blockcount(gotp, gotblkcnt);
> + got.br_blockcount = gotblkcnt;
> + if (cur) {
> + error = xfs_bmbt_update(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + got.br_state);
> + if (error)
> + goto del_cursor;
> + }
> +
> + /* Add new extent */
> + (*current_ext)++;
> + xfs_iext_insert(ip, *current_ext, 1, &new, 0);
> + XFS_IFORK_NEXT_SET(ip, whichfork,
> + XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
> + new.br_startblock, new.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 0, del_cursor);
> + cur->bc_rec.b.br_state = new.br_state;
> +
> + error = xfs_btree_insert(cur, &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + /*
> + * Convert to a btree if necessary.
> + */
> + if (xfs_bmap_needs_btree(ip, whichfork)) {
> + int tmp_logflags; /* partial log flag return val */
> +
> + ASSERT(cur == NULL);
> + error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
> + &cur, 0, &tmp_logflags, whichfork);
> + logflags |= tmp_logflags;
> + }
> +
> +del_cursor:
> + if (cur)
> + xfs_btree_del_cursor(cur,
> + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
> + xfs_trans_log_inode(tp, ip, logflags);
> + return error;
> +}
> +
> +int
> +xfs_bmap_split_extent(
> + struct xfs_inode *ip,
> + xfs_fileoff_t split_fsb,
> + xfs_extnum_t *split_ext)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + struct xfs_bmap_free free_list;
> + xfs_fsblock_t firstfsb;
> + int committed;
> + int error;
> +
> + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> +
> + if (error) {
> + /*
> + * Free the transaction structure.
> + */
> + xfs_trans_cancel(tp, 0);
> + return error;
> + }
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
> + ip->i_gdquot, ip->i_pdquot,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
> + XFS_QMOPT_RES_REGBLKS);
> + if (error)
> + goto error1;
> +
> + xfs_trans_ijoin(tp, ip, 0);
> + xfs_bmap_init(&free_list, &firstfsb);
> +
> + error = xfs_bmap_split_extent_at(tp, ip, split_fsb, split_ext,
> + &firstfsb, &free_list);
> + if (error)
> + goto error0;
> +
> + error = xfs_bmap_finish(&tp, &free_list, &committed);
> + if (error)
> + goto error0;
> +
> + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> + return error;
> +error0:
> + xfs_bmap_cancel(&free_list);
> +error1:
> + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return error;
> +}
> +
> +/*
> + * Shift extent records to the right to make a hole.
> + * The maximum number of extents to be shifted in a single operation
> + * is @num_exts, and @current_ext keeps track of the current extent
> + * index we have shifted. @offset_shift_fsb is the length by which each
> + * extent is shifted. @end_ext is the last extent to be shifted.
> + */
> +int
> +xfs_bmap_shift_extents_right(
> + struct xfs_trans *tp,
> + struct xfs_inode *ip,
> + int *done,
> + xfs_fileoff_t offset_shift_fsb,
> + xfs_extnum_t *current_ext,
> + xfs_extnum_t end_ext,
> + xfs_fsblock_t *firstblock,
> + struct xfs_bmap_free *flist,
> + int num_exts)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_btree_cur *cur;
> + struct xfs_bmbt_rec_host *gotp;
> + struct xfs_bmbt_irec got;
> + struct xfs_bmbt_irec right;
> + xfs_ifork_t *ifp;
> + xfs_fileoff_t startoff;
> + xfs_filblks_t blockcount = 0;
> + xfs_extnum_t last_extent;
> + int error = 0;
> + int i;
> + int whichfork = XFS_DATA_FORK;
> + int logflags;
> +
> + if (unlikely(XFS_TEST_ERROR(
> + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> + XFS_ERROR_REPORT("xfs_bmap_shift_extents_right",
> + XFS_ERRLEVEL_LOW, mp);
> + return XFS_ERROR(EFSCORRUPTED);
> + }
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return XFS_ERROR(EIO);
> +
> + ASSERT(current_ext != NULL);
> +
> + /* We are going to change core inode */
> + logflags = XFS_ILOG_CORE;
> + ifp = XFS_IFORK_PTR(ip, whichfork);
> +
> + if (ifp->if_flags & XFS_IFBROOT) {
> + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
> + cur->bc_private.b.firstblock = *firstblock;
> + cur->bc_private.b.flist = flist;
> + cur->bc_private.b.flags = 0;
> + } else {
> + cur = NULL;
> + logflags |= XFS_ILOG_DEXT;
> + }
> +
> + /* start shifting extents to right */
> + while (num_exts-- > 0) {
> + blockcount = 0;
> +
> + if (*current_ext < end_ext) {
> + *done = 1;
> + break;
> + }
> +
> + gotp = xfs_iext_get_ext(ifp, *current_ext);
> + xfs_bmbt_get_all(gotp, &got);
> + startoff = got.br_startoff + offset_shift_fsb;
> +
> + /*
> + * Before shifting extent into hole, make sure that the hole
> + * is large enough to accomodate the shift. This checking has
> + * to be performed for all except the last extent.
> + */
> + last_extent = (ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
> + if (last_extent != *current_ext) {
> + xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
> + *current_ext + 1), &right);
> + if (startoff + got.br_blockcount > right.br_startoff) {
> + error = XFS_ERROR(EINVAL);
> + if (error)
> + goto del_cursor;
> + }
> + }
> +
> + /* Check if we can merge 2 adjacent extents */
> + if (last_extent != *current_ext &&
> + right.br_startoff == startoff + got.br_blockcount &&
> + right.br_startblock ==
> + got.br_startblock + got.br_blockcount &&
> + right.br_state == got.br_state &&
> + right.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
> +
> + /*
> + * Merge the current extent with the extent to
> + * the right. Remove the right extent, calculate
> + * a new block count for the current extent to cover
> + * the range of both and decrement the number of extents
> + * in the fork.
> + */
> + blockcount = right.br_blockcount + got.br_blockcount;
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur,
> + right.br_startoff,
> + right.br_startblock,
> + right.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + xfs_iext_remove(ip, *current_ext + 1, 1, 0);
> + if (cur) {
> + error = xfs_btree_delete(cur, &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> + XFS_IFORK_NEXT_SET(ip, whichfork,
> + XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> +
> + }
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + if (blockcount) {
> + xfs_bmbt_set_blockcount(gotp, blockcount);
> + got.br_blockcount = blockcount;
> + }
> +
> + xfs_bmbt_set_startoff(gotp, startoff);
> + got.br_startoff = startoff;
> +
> + if (cur) {
> + error = xfs_bmbt_update(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + got.br_state);
> + if (error)
> + goto del_cursor;
> + }
> +
> + (*current_ext)--;
> + }
> +
> +del_cursor:
> + if (cur)
> + xfs_btree_del_cursor(cur,
> + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
> + xfs_trans_log_inode(tp, ip, logflags);
> + return error;
> +}
> diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
> index 38ba36e..af05899 100644
> --- a/fs/xfs/xfs_bmap.h
> +++ b/fs/xfs/xfs_bmap.h
> @@ -179,10 +179,17 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
> int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
> xfs_extnum_t num);
> uint xfs_default_attroffset(struct xfs_inode *ip);
> -int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
> +int xfs_bmap_shift_extents_left(struct xfs_trans *tp, struct xfs_inode *ip,
> int *done, xfs_fileoff_t start_fsb,
> xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
> xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
> int num_exts);
> +int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset,
> + xfs_extnum_t *split_ext);
> +int xfs_bmap_shift_extents_right(struct xfs_trans *tp, struct xfs_inode *ip,
> + int *done, xfs_fsblock_t offset_shift_fsb,
> + xfs_extnum_t *current_ext, xfs_extnum_t end_ext,
> + xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
> + int num_exts);
>
> #endif /* __XFS_BMAP_H__ */
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 057f671..b9511dd 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -1548,7 +1548,7 @@ xfs_collapse_file_space(
> * We are using the write transaction in which max 2 bmbt
> * updates are allowed
> */
> - error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
> + error = xfs_bmap_shift_extents_left(tp, ip, &done, start_fsb,
> shift_fsb, ¤t_ext,
> &first_block, &free_list,
> XFS_BMAP_MAX_SHIFT_EXTENTS);
> @@ -1572,6 +1572,132 @@ out:
> }
>
> /*
> + * xfs_insert_file_space()
> + * This routine allocate disk space and shift extent for the given file.
> + * The first thing we do is to sync dirty data and invalidate page cache
> + * over the region on which insert range is working. And split an extent
> + * to two extents at given offset by calling xfs_bmap_split_extent.
> + * And shift all extent records which are laying between [offset,
> + * last allocated extent] to the right to reserve hole range. Lastly
> + * allocate an unwritten extent in hole range created by shifting extents.
> + *
> + * RETURNS:
> + * 0 on success
> + * errno on error
> + *
> + */
> +int
> +xfs_insert_file_space(
> + struct xfs_inode *ip,
> + loff_t offset,
> + loff_t len)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + struct xfs_bmap_free free_list;
> + xfs_fsblock_t first_block;
> + xfs_ifork_t *ifp;
> + int done = 0;
> + int committed;
> + int error;
> + uint rounding;
> + xfs_fileoff_t start_fsb;
> + xfs_fileoff_t shift_fsb;
> + xfs_extnum_t split_ext;
> + xfs_extnum_t current_ext = 0;
> + xfs_off_t ioffset;
> +
> + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
> + trace_xfs_insert_file_space(ip);
> +
> + error = xfs_qm_dqattach(ip, 0);
> + if (error)
> + return error;
> +
> + /* wait for the completion of any pending DIOs */
> + inode_dio_wait(VFS_I(ip));
> +
> + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
> + ioffset = offset & ~(rounding - 1);
> + error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
> + ioffset, -1);
> + if (error)
> + return error;
> +
> + truncate_pagecache_range(VFS_I(ip), ioffset, -1);
> +
> + start_fsb = XFS_B_TO_FSB(mp, offset);
> + shift_fsb = XFS_B_TO_FSB(mp, len);
> +
> + error = xfs_bmap_split_extent(ip, start_fsb, &split_ext);
> + if (error)
> + return error;
> +
> + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> + current_ext = (ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
> + while (!error && !done) {
> + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> + /*
> + * We would need to reserve permanent block for transaction.
> + * This will come into picture when after shifting extent into
> + * hole we found that adjacent extents can be merged which
> + * may lead to freeing of a block during record update.
> + */
> + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> + if (error) {
> + xfs_trans_cancel(tp, 0);
> + break;
> + }
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
> + ip->i_gdquot, ip->i_pdquot,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
> + XFS_QMOPT_RES_REGBLKS);
> + if (error)
> + goto error1;
> +
> + xfs_trans_ijoin(tp, ip, 0);
> +
> + xfs_bmap_init(&free_list, &first_block);
> +
> + /*
> + * We are using the write transaction in which max 2 bmbt
> + * updates are allowed
> + */
> + error = xfs_bmap_shift_extents_right(tp, ip, &done, shift_fsb,
> + ¤t_ext, split_ext,
> + &first_block, &free_list,
> + XFS_BMAP_MAX_SHIFT_EXTENTS);
> + if (error)
> + goto error0;
> +
> + error = xfs_bmap_finish(&tp, &free_list, &committed);
> + if (error)
> + goto error0;
> +
> + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + if (error)
> + goto out;
> + }
> +
> + /* Add unwritten extent in a hole range. */
> + error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC);
> +
> +out:
> + return error;
> +
> +error0:
> + xfs_bmap_cancel(&free_list);
> +error1:
> + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return error;
> +}
> +
> +/*
> * We need to check that the format of the data fork in the temporary inode is
> * valid for the target inode before doing the swap. This is not a problem with
> * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
> diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
> index 935ed2b..d62ab4b 100644
> --- a/fs/xfs/xfs_bmap_util.h
> +++ b/fs/xfs/xfs_bmap_util.h
> @@ -101,6 +101,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
> xfs_off_t len);
> int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
> xfs_off_t len);
> +int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
> + xfs_off_t len);
>
> /* EOF block manipulation functions */
> bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..f160cd5 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -756,11 +756,13 @@ xfs_file_fallocate(
> struct xfs_trans *tp;
> long error;
> loff_t new_size = 0;
> + int do_file_insert = 0;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
> if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
> - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
> + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
> + FALLOC_FL_INSERT_RANGE))
> return -EOPNOTSUPP;
>
> xfs_ilock(ip, XFS_IOLOCK_EXCL);
> @@ -790,6 +792,28 @@ xfs_file_fallocate(
> error = xfs_collapse_file_space(ip, offset, len);
> if (error)
> goto out_unlock;
> + } else if (mode & FALLOC_FL_INSERT_RANGE) {
> + unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
> +
> + if (offset & blksize_mask || len & blksize_mask) {
> + error = -EINVAL;
> + goto out_unlock;
> + }
> +
> + /* Check for wrap through zero */
> + if (inode->i_size + len > inode->i_sb->s_maxbytes) {
> + error = -EFBIG;
> + goto out_unlock;
> + }
> +
> + /* Offset should be less than i_size */
> + if (offset >= i_size_read(inode)) {
> + error = -EINVAL;
> + goto out_unlock;
> + }
> +
> + new_size = i_size_read(inode) + len;
> + do_file_insert = 1;
> } else {
> if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> offset + len > i_size_read(inode)) {
> @@ -840,8 +864,20 @@ xfs_file_fallocate(
> iattr.ia_valid = ATTR_SIZE;
> iattr.ia_size = new_size;
> error = xfs_setattr_size(ip, &iattr);
> + if (error)
> + goto out_unlock;
> }
>
> + /*
> + * Some operations are performed after the inode size is updated. For
> + * example, insert range expands the address space of the file, shifts
> + * all subsequent extents over and allocates space into the hole.
> + * Updating the size first ensures that shifted extents aren't left
> + * hanging past EOF in the event of a crash or failure.
> + */
> + if (do_file_insert)
> + error = xfs_insert_file_space(ip, offset, len);
> +
> out_unlock:
> xfs_iunlock(ip, XFS_IOLOCK_EXCL);
> return -error;
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 152f827..8943c9f 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -663,6 +663,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
> DEFINE_INODE_EVENT(xfs_free_file_space);
> DEFINE_INODE_EVENT(xfs_zero_file_space);
> DEFINE_INODE_EVENT(xfs_collapse_file_space);
> +DEFINE_INODE_EVENT(xfs_insert_file_space);
> DEFINE_INODE_EVENT(xfs_readdir);
> #ifdef CONFIG_XFS_POSIX_ACL
> DEFINE_INODE_EVENT(xfs_get_acl);
> --
> 1.7.11-rc0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate
2014-05-27 8:18 [PATCH v3 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate Namjae Jeon
2014-05-27 18:40 ` Brian Foster
@ 2014-05-27 20:59 ` Brian Foster
2014-05-28 0:10 ` Namjae Jeon
1 sibling, 1 reply; 4+ messages in thread
From: Brian Foster @ 2014-05-27 20:59 UTC (permalink / raw)
To: Namjae Jeon; +Cc: linux-fsdevel, Ashish Sangwan, xfs
On Tue, May 27, 2014 at 05:18:57PM +0900, Namjae Jeon wrote:
> This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.
>
> 1) Make sure that both offset and len are block size aligned.
> 2) Update the i_size of inode by len bytes.
> 3) Compute the file's logical block number against offset. If the computed
> block number is not the starting block of the extent, split the extent
> such that the block number is the starting block of the extent.
> 4) Shift all the extents which are lying bewteen [offset, last allocated extent]
> towards right by len bytes. This step will make a hole of len bytes
> at offset.
> 5) Allocate unwritten extents for the hole created in step 4.
>
> Cc: Brian Foster <bfoster@redhat.com>
> Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
> ---
FYI, while I didn't notice any test failures so far, a run through some
of the generic xfstests do produce some assert failures and whatnot. I
noticed output from generic/013,091,127,263. I haven't looked at these
much at all so far, I just wanted to get them on the list...
Namjae, I reproduced these issues running ./check -g auto with
CONFIG_XFS_WARN enabled for XFS (to warn on assert failures). Below is a
quick dump of output from generic/091, as an example. That one
reproduces consistently for me. We'll probably want to run through and
squash this and any others before this gets merged.
Brian
root: run xfstest generic/091
kernel: [ 8507.476387] XFS: Assertion failed: cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_private.b.allocated == 0, file: fs/xfs/xfs_btree.c, line: 314
kernel: [ 8507.476392] ------------[ cut here ]------------
kernel: [ 8507.476430] WARNING: CPU: 2 PID: 4648 at fs/xfs/xfs_message.c:99 asswarn+0x33/0x40 [xfs]()
kernel: [ 8507.476433] Modules linked in: xfs loop ip6t_rpfilter ip6t_REJECT xt_conntrack cfg80211 rfkill ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec joydev snd_hwdep snd_seq ppdev snd_seq_device snd_pcm virtio_console serio_raw virtio_balloon parport_pc snd_timer parport pvpanic snd soundcore i2c_piix4 sunrpc virtio_net virtio_blk qxl drm_kms_helper virtio_pci ttm virtio_ring virtio drm ata_generic i2c_core pata_acpi [last unloaded: xfs]
kernel: [ 8507.476520] CPU: 2 PID: 4648 Comm: fsx Tainted: G W O 3.15.0-rc5+ #87
kernel: [ 8507.476522] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
kernel: [ 8507.476525] 0000000000000000 00000000cca654ac ffff88009c415c00 ffffffff817bc80e
kernel: [ 8507.476529] 0000000000000000 ffff88009c415c38 ffffffff81095c4d ffff880035ee41a0
kernel: [ 8507.476533] 0000000000000000 ffff880035ee41a0 0000000000000000 ffff8800af5d0000
kernel: [ 8507.476537] Call Trace:
kernel: [ 8507.476545] [<ffffffff817bc80e>] dump_stack+0x4d/0x66
kernel: [ 8507.476549] [<ffffffff81095c4d>] warn_slowpath_common+0x7d/0xa0
kernel: [ 8507.476553] [<ffffffff81095d7a>] warn_slowpath_null+0x1a/0x20
kernel: [ 8507.476575] [<ffffffffa03a9173>] asswarn+0x33/0x40 [xfs]
kernel: [ 8507.476600] [<ffffffffa03d5739>] xfs_btree_del_cursor+0x89/0xb0 [xfs]
kernel: [ 8507.476624] [<ffffffffa03c485e>] xfs_bmap_split_extent_at.constprop.19+0x30e/0x500 [xfs]
kernel: [ 8507.476648] [<ffffffffa03d1c4e>] xfs_bmap_split_extent+0x13e/0x1c0 [xfs]
kernel: [ 8507.476668] [<ffffffffa0390578>] xfs_insert_file_space+0x188/0x410 [xfs]
kernel: [ 8507.476695] [<ffffffffa03f5f97>] ? xfs_iunlock+0x157/0x230 [xfs]
kernel: [ 8507.476717] [<ffffffffa03a71e1>] ? xfs_setattr_size+0x381/0x4b0 [xfs]
kernel: [ 8507.476737] [<ffffffffa0398fe3>] xfs_file_fallocate+0x3c3/0x470 [xfs]
kernel: [ 8507.476742] [<ffffffff8122ca28>] ? do_fallocate+0x118/0x1d0
kernel: [ 8507.476745] [<ffffffff8122ca3e>] do_fallocate+0x12e/0x1d0
kernel: [ 8507.476748] [<ffffffff8122cb24>] SyS_fallocate+0x44/0x70
kernel: [ 8507.476752] [<ffffffff817d0329>] system_call_fastpath+0x16/0x1b
kernel: [ 8507.476755] ---[ end trace cf941380ff6e4058 ]---
>
> Changelog
> v3:
> - remove XFS_TRANS_RESERVE and assert.
> - update the comment of blockcount calculation.
> - use 'if(blockcount)' instead of 'if (got.br_blockcount < blockcount)'.
> - move insert_file_space() calling under xfs_setattr_size to avoid code duplicate.
>
> v2:
> - remove reserved enable.
> - add xfs_qm_dqattach.
> - reset blockcount in xfs_bmap_shift_extents_right.
> - update i_size to avoid data loss before insert_file_space() is called.
> - use in-memory extent array size that delayed allocation extents.
>
> fs/xfs/xfs_bmap.c | 377 ++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/xfs/xfs_bmap.h | 9 +-
> fs/xfs/xfs_bmap_util.c | 128 ++++++++++++++++-
> fs/xfs/xfs_bmap_util.h | 2 +
> fs/xfs/xfs_file.c | 38 ++++-
> fs/xfs/xfs_trace.h | 1 +
> 6 files changed, 550 insertions(+), 5 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
> index 1ff0da6..8023b7e 100644
> --- a/fs/xfs/xfs_bmap.c
> +++ b/fs/xfs/xfs_bmap.c
> @@ -5419,7 +5419,7 @@ error0:
> * into, this will be considered invalid operation and we abort immediately.
> */
> int
> -xfs_bmap_shift_extents(
> +xfs_bmap_shift_extents_left(
> struct xfs_trans *tp,
> struct xfs_inode *ip,
> int *done,
> @@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
> (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> - XFS_ERROR_REPORT("xfs_bmap_shift_extents",
> + XFS_ERROR_REPORT("xfs_bmap_shift_extents_left",
> XFS_ERRLEVEL_LOW, mp);
> return XFS_ERROR(EFSCORRUPTED);
> }
> @@ -5606,3 +5606,376 @@ del_cursor:
> xfs_trans_log_inode(tp, ip, logflags);
> return error;
> }
> +
> +/*
> + * Splits an extent into two extents at split_fsb block that it is
> + * the first block of the current_ext. @current_ext is a target extent
> + * to be splitted. @split_fsb is a block where the extents is spliited.
> + * If split_fsb lies in a hole or the first block of extents, just return 0.
> + */
> +STATIC int
> +xfs_bmap_split_extent_at(
> + struct xfs_trans *tp,
> + struct xfs_inode *ip,
> + xfs_fileoff_t split_fsb,
> + xfs_extnum_t *current_ext,
> + xfs_fsblock_t *firstfsb,
> + struct xfs_bmap_free *free_list)
> +{
> + int whichfork = XFS_DATA_FORK;
> + struct xfs_btree_cur *cur;
> + struct xfs_bmbt_rec_host *gotp;
> + struct xfs_bmbt_irec got;
> + struct xfs_bmbt_irec new; /* splitted extent */
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_ifork *ifp;
> + xfs_fsblock_t gotblkcnt; /* new block count for got */
> + int error = 0;
> + int logflags;
> + int i = 0;
> +
> + if (unlikely(XFS_TEST_ERROR(
> + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> + XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
> + XFS_ERRLEVEL_LOW, mp);
> + return XFS_ERROR(EFSCORRUPTED);
> + }
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return XFS_ERROR(EIO);
> +
> + ASSERT(current_ext != NULL);
> +
> + ifp = XFS_IFORK_PTR(ip, whichfork);
> + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> + /* Read in all the extents */
> + error = xfs_iread_extents(tp, ip, whichfork);
> + if (error)
> + return error;
> + }
> +
> + gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
> + /*
> + * gotp can be null in 2 cases: 1) if there are no extents
> + * or 2) split_fsb lies in a hole beyond which there are
> + * no extents. Either way, we are done.
> + */
> + if (!gotp)
> + return 0;
> +
> + xfs_bmbt_get_all(gotp, &got);
> +
> + /*
> + * Check split_fsb lies in a hole or the start boundary offset
> + * of the extent.
> + */
> + if (got.br_startoff >= split_fsb)
> + return 0;
> +
> + gotblkcnt = split_fsb - got.br_startoff;
> + new.br_startoff = split_fsb;
> + new.br_startblock = got.br_startblock + gotblkcnt;
> + new.br_blockcount = got.br_blockcount - gotblkcnt;
> + new.br_state = got.br_state;
> +
> + /* We are going to change core inode */
> + logflags = XFS_ILOG_CORE;
> +
> + if (ifp->if_flags & XFS_IFBROOT) {
> + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
> + cur->bc_private.b.firstblock = *firstfsb;
> + cur->bc_private.b.flist = free_list;
> + cur->bc_private.b.flags = 0;
> + } else {
> + cur = NULL;
> + logflags |= XFS_ILOG_DEXT;
> + }
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + xfs_bmbt_set_blockcount(gotp, gotblkcnt);
> + got.br_blockcount = gotblkcnt;
> + if (cur) {
> + error = xfs_bmbt_update(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + got.br_state);
> + if (error)
> + goto del_cursor;
> + }
> +
> + /* Add new extent */
> + (*current_ext)++;
> + xfs_iext_insert(ip, *current_ext, 1, &new, 0);
> + XFS_IFORK_NEXT_SET(ip, whichfork,
> + XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
> + new.br_startblock, new.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 0, del_cursor);
> + cur->bc_rec.b.br_state = new.br_state;
> +
> + error = xfs_btree_insert(cur, &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + /*
> + * Convert to a btree if necessary.
> + */
> + if (xfs_bmap_needs_btree(ip, whichfork)) {
> + int tmp_logflags; /* partial log flag return val */
> +
> + ASSERT(cur == NULL);
> + error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
> + &cur, 0, &tmp_logflags, whichfork);
> + logflags |= tmp_logflags;
> + }
> +
> +del_cursor:
> + if (cur)
> + xfs_btree_del_cursor(cur,
> + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
> + xfs_trans_log_inode(tp, ip, logflags);
> + return error;
> +}
> +
> +int
> +xfs_bmap_split_extent(
> + struct xfs_inode *ip,
> + xfs_fileoff_t split_fsb,
> + xfs_extnum_t *split_ext)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + struct xfs_bmap_free free_list;
> + xfs_fsblock_t firstfsb;
> + int committed;
> + int error;
> +
> + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> +
> + if (error) {
> + /*
> + * Free the transaction structure.
> + */
> + xfs_trans_cancel(tp, 0);
> + return error;
> + }
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
> + ip->i_gdquot, ip->i_pdquot,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
> + XFS_QMOPT_RES_REGBLKS);
> + if (error)
> + goto error1;
> +
> + xfs_trans_ijoin(tp, ip, 0);
> + xfs_bmap_init(&free_list, &firstfsb);
> +
> + error = xfs_bmap_split_extent_at(tp, ip, split_fsb, split_ext,
> + &firstfsb, &free_list);
> + if (error)
> + goto error0;
> +
> + error = xfs_bmap_finish(&tp, &free_list, &committed);
> + if (error)
> + goto error0;
> +
> + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> + return error;
> +error0:
> + xfs_bmap_cancel(&free_list);
> +error1:
> + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return error;
> +}
> +
> +/*
> + * Shift extent records to the right to make a hole.
> + * The maximum number of extents to be shifted in a single operation
> + * is @num_exts, and @current_ext keeps track of the current extent
> + * index we have shifted. @offset_shift_fsb is the length by which each
> + * extent is shifted. @end_ext is the last extent to be shifted.
> + */
> +int
> +xfs_bmap_shift_extents_right(
> + struct xfs_trans *tp,
> + struct xfs_inode *ip,
> + int *done,
> + xfs_fileoff_t offset_shift_fsb,
> + xfs_extnum_t *current_ext,
> + xfs_extnum_t end_ext,
> + xfs_fsblock_t *firstblock,
> + struct xfs_bmap_free *flist,
> + int num_exts)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_btree_cur *cur;
> + struct xfs_bmbt_rec_host *gotp;
> + struct xfs_bmbt_irec got;
> + struct xfs_bmbt_irec right;
> + xfs_ifork_t *ifp;
> + xfs_fileoff_t startoff;
> + xfs_filblks_t blockcount = 0;
> + xfs_extnum_t last_extent;
> + int error = 0;
> + int i;
> + int whichfork = XFS_DATA_FORK;
> + int logflags;
> +
> + if (unlikely(XFS_TEST_ERROR(
> + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> + XFS_ERROR_REPORT("xfs_bmap_shift_extents_right",
> + XFS_ERRLEVEL_LOW, mp);
> + return XFS_ERROR(EFSCORRUPTED);
> + }
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return XFS_ERROR(EIO);
> +
> + ASSERT(current_ext != NULL);
> +
> + /* We are going to change core inode */
> + logflags = XFS_ILOG_CORE;
> + ifp = XFS_IFORK_PTR(ip, whichfork);
> +
> + if (ifp->if_flags & XFS_IFBROOT) {
> + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
> + cur->bc_private.b.firstblock = *firstblock;
> + cur->bc_private.b.flist = flist;
> + cur->bc_private.b.flags = 0;
> + } else {
> + cur = NULL;
> + logflags |= XFS_ILOG_DEXT;
> + }
> +
> + /* start shifting extents to right */
> + while (num_exts-- > 0) {
> + blockcount = 0;
> +
> + if (*current_ext < end_ext) {
> + *done = 1;
> + break;
> + }
> +
> + gotp = xfs_iext_get_ext(ifp, *current_ext);
> + xfs_bmbt_get_all(gotp, &got);
> + startoff = got.br_startoff + offset_shift_fsb;
> +
> + /*
> + * Before shifting extent into hole, make sure that the hole
> + * is large enough to accomodate the shift. This checking has
> + * to be performed for all except the last extent.
> + */
> + last_extent = (ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
> + if (last_extent != *current_ext) {
> + xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
> + *current_ext + 1), &right);
> + if (startoff + got.br_blockcount > right.br_startoff) {
> + error = XFS_ERROR(EINVAL);
> + if (error)
> + goto del_cursor;
> + }
> + }
> +
> + /* Check if we can merge 2 adjacent extents */
> + if (last_extent != *current_ext &&
> + right.br_startoff == startoff + got.br_blockcount &&
> + right.br_startblock ==
> + got.br_startblock + got.br_blockcount &&
> + right.br_state == got.br_state &&
> + right.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
> +
> + /*
> + * Merge the current extent with the extent to
> + * the right. Remove the right extent, calculate
> + * a new block count for the current extent to cover
> + * the range of both and decrement the number of extents
> + * in the fork.
> + */
> + blockcount = right.br_blockcount + got.br_blockcount;
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur,
> + right.br_startoff,
> + right.br_startblock,
> + right.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + xfs_iext_remove(ip, *current_ext + 1, 1, 0);
> + if (cur) {
> + error = xfs_btree_delete(cur, &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> + XFS_IFORK_NEXT_SET(ip, whichfork,
> + XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> +
> + }
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> + }
> +
> + if (blockcount) {
> + xfs_bmbt_set_blockcount(gotp, blockcount);
> + got.br_blockcount = blockcount;
> + }
> +
> + xfs_bmbt_set_startoff(gotp, startoff);
> + got.br_startoff = startoff;
> +
> + if (cur) {
> + error = xfs_bmbt_update(cur, got.br_startoff,
> + got.br_startblock,
> + got.br_blockcount,
> + got.br_state);
> + if (error)
> + goto del_cursor;
> + }
> +
> + (*current_ext)--;
> + }
> +
> +del_cursor:
> + if (cur)
> + xfs_btree_del_cursor(cur,
> + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
> + xfs_trans_log_inode(tp, ip, logflags);
> + return error;
> +}
> diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
> index 38ba36e..af05899 100644
> --- a/fs/xfs/xfs_bmap.h
> +++ b/fs/xfs/xfs_bmap.h
> @@ -179,10 +179,17 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
> int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
> xfs_extnum_t num);
> uint xfs_default_attroffset(struct xfs_inode *ip);
> -int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
> +int xfs_bmap_shift_extents_left(struct xfs_trans *tp, struct xfs_inode *ip,
> int *done, xfs_fileoff_t start_fsb,
> xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
> xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
> int num_exts);
> +int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset,
> + xfs_extnum_t *split_ext);
> +int xfs_bmap_shift_extents_right(struct xfs_trans *tp, struct xfs_inode *ip,
> + int *done, xfs_fsblock_t offset_shift_fsb,
> + xfs_extnum_t *current_ext, xfs_extnum_t end_ext,
> + xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
> + int num_exts);
>
> #endif /* __XFS_BMAP_H__ */
> diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
> index 057f671..b9511dd 100644
> --- a/fs/xfs/xfs_bmap_util.c
> +++ b/fs/xfs/xfs_bmap_util.c
> @@ -1548,7 +1548,7 @@ xfs_collapse_file_space(
> * We are using the write transaction in which max 2 bmbt
> * updates are allowed
> */
> - error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
> + error = xfs_bmap_shift_extents_left(tp, ip, &done, start_fsb,
> shift_fsb, ¤t_ext,
> &first_block, &free_list,
> XFS_BMAP_MAX_SHIFT_EXTENTS);
> @@ -1572,6 +1572,132 @@ out:
> }
>
> /*
> + * xfs_insert_file_space()
> + * This routine allocate disk space and shift extent for the given file.
> + * The first thing we do is to sync dirty data and invalidate page cache
> + * over the region on which insert range is working. And split an extent
> + * to two extents at given offset by calling xfs_bmap_split_extent.
> + * And shift all extent records which are laying between [offset,
> + * last allocated extent] to the right to reserve hole range. Lastly
> + * allocate an unwritten extent in hole range created by shifting extents.
> + *
> + * RETURNS:
> + * 0 on success
> + * errno on error
> + *
> + */
> +int
> +xfs_insert_file_space(
> + struct xfs_inode *ip,
> + loff_t offset,
> + loff_t len)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + struct xfs_bmap_free free_list;
> + xfs_fsblock_t first_block;
> + xfs_ifork_t *ifp;
> + int done = 0;
> + int committed;
> + int error;
> + uint rounding;
> + xfs_fileoff_t start_fsb;
> + xfs_fileoff_t shift_fsb;
> + xfs_extnum_t split_ext;
> + xfs_extnum_t current_ext = 0;
> + xfs_off_t ioffset;
> +
> + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
> + trace_xfs_insert_file_space(ip);
> +
> + error = xfs_qm_dqattach(ip, 0);
> + if (error)
> + return error;
> +
> + /* wait for the completion of any pending DIOs */
> + inode_dio_wait(VFS_I(ip));
> +
> + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
> + ioffset = offset & ~(rounding - 1);
> + error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
> + ioffset, -1);
> + if (error)
> + return error;
> +
> + truncate_pagecache_range(VFS_I(ip), ioffset, -1);
> +
> + start_fsb = XFS_B_TO_FSB(mp, offset);
> + shift_fsb = XFS_B_TO_FSB(mp, len);
> +
> + error = xfs_bmap_split_extent(ip, start_fsb, &split_ext);
> + if (error)
> + return error;
> +
> + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> + current_ext = (ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
> + while (!error && !done) {
> + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> + /*
> + * We would need to reserve permanent block for transaction.
> + * This will come into picture when after shifting extent into
> + * hole we found that adjacent extents can be merged which
> + * may lead to freeing of a block during record update.
> + */
> + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> + if (error) {
> + xfs_trans_cancel(tp, 0);
> + break;
> + }
> +
> + xfs_ilock(ip, XFS_ILOCK_EXCL);
> + error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
> + ip->i_gdquot, ip->i_pdquot,
> + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
> + XFS_QMOPT_RES_REGBLKS);
> + if (error)
> + goto error1;
> +
> + xfs_trans_ijoin(tp, ip, 0);
> +
> + xfs_bmap_init(&free_list, &first_block);
> +
> + /*
> + * We are using the write transaction in which max 2 bmbt
> + * updates are allowed
> + */
> + error = xfs_bmap_shift_extents_right(tp, ip, &done, shift_fsb,
> + ¤t_ext, split_ext,
> + &first_block, &free_list,
> + XFS_BMAP_MAX_SHIFT_EXTENTS);
> + if (error)
> + goto error0;
> +
> + error = xfs_bmap_finish(&tp, &free_list, &committed);
> + if (error)
> + goto error0;
> +
> + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + if (error)
> + goto out;
> + }
> +
> + /* Add unwritten extent in a hole range. */
> + error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC);
> +
> +out:
> + return error;
> +
> +error0:
> + xfs_bmap_cancel(&free_list);
> +error1:
> + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + return error;
> +}
> +
> +/*
> * We need to check that the format of the data fork in the temporary inode is
> * valid for the target inode before doing the swap. This is not a problem with
> * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
> diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
> index 935ed2b..d62ab4b 100644
> --- a/fs/xfs/xfs_bmap_util.h
> +++ b/fs/xfs/xfs_bmap_util.h
> @@ -101,6 +101,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
> xfs_off_t len);
> int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
> xfs_off_t len);
> +int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
> + xfs_off_t len);
>
> /* EOF block manipulation functions */
> bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..f160cd5 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -756,11 +756,13 @@ xfs_file_fallocate(
> struct xfs_trans *tp;
> long error;
> loff_t new_size = 0;
> + int do_file_insert = 0;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
> if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
> - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
> + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
> + FALLOC_FL_INSERT_RANGE))
> return -EOPNOTSUPP;
>
> xfs_ilock(ip, XFS_IOLOCK_EXCL);
> @@ -790,6 +792,28 @@ xfs_file_fallocate(
> error = xfs_collapse_file_space(ip, offset, len);
> if (error)
> goto out_unlock;
> + } else if (mode & FALLOC_FL_INSERT_RANGE) {
> + unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
> +
> + if (offset & blksize_mask || len & blksize_mask) {
> + error = -EINVAL;
> + goto out_unlock;
> + }
> +
> + /* Check for wrap through zero */
> + if (inode->i_size + len > inode->i_sb->s_maxbytes) {
> + error = -EFBIG;
> + goto out_unlock;
> + }
> +
> + /* Offset should be less than i_size */
> + if (offset >= i_size_read(inode)) {
> + error = -EINVAL;
> + goto out_unlock;
> + }
> +
> + new_size = i_size_read(inode) + len;
> + do_file_insert = 1;
> } else {
> if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> offset + len > i_size_read(inode)) {
> @@ -840,8 +864,20 @@ xfs_file_fallocate(
> iattr.ia_valid = ATTR_SIZE;
> iattr.ia_size = new_size;
> error = xfs_setattr_size(ip, &iattr);
> + if (error)
> + goto out_unlock;
> }
>
> + /*
> + * Some operations are performed after the inode size is updated. For
> + * example, insert range expands the address space of the file, shifts
> + * all subsequent extents over and allocates space into the hole.
> + * Updating the size first ensures that shifted extents aren't left
> + * hanging past EOF in the event of a crash or failure.
> + */
> + if (do_file_insert)
> + error = xfs_insert_file_space(ip, offset, len);
> +
> out_unlock:
> xfs_iunlock(ip, XFS_IOLOCK_EXCL);
> return -error;
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 152f827..8943c9f 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -663,6 +663,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
> DEFINE_INODE_EVENT(xfs_free_file_space);
> DEFINE_INODE_EVENT(xfs_zero_file_space);
> DEFINE_INODE_EVENT(xfs_collapse_file_space);
> +DEFINE_INODE_EVENT(xfs_insert_file_space);
> DEFINE_INODE_EVENT(xfs_readdir);
> #ifdef CONFIG_XFS_POSIX_ACL
> DEFINE_INODE_EVENT(xfs_get_acl);
> --
> 1.7.11-rc0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 4+ messages in thread