* [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
@ 2013-09-02 14:29 Namjae Jeon
2014-05-27 22:20 ` Mark Tinguely
0 siblings, 1 reply; 11+ messages in thread
From: Namjae Jeon @ 2013-09-02 14:29 UTC (permalink / raw)
To: viro, mtk.manpages, tytso, adilger.kernel, bpm, elder, hch, david
Cc: Namjae Jeon, Namjae Jeon, linux-kernel, xfs, a.sangwan,
linux-fsdevel, linux-ext4
From: Namjae Jeon <namjae.jeon@samsung.com>
Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
---
fs/xfs/xfs_bmap.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_bmap.h | 3 +
fs/xfs/xfs_bmap_util.c | 96 ++++++++++++++++++++++++++
fs/xfs/xfs_bmap_util.h | 2 +
fs/xfs/xfs_file.c | 20 ++++--
fs/xfs/xfs_fs.h | 6 ++
6 files changed, 296 insertions(+), 5 deletions(-)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 92b8309..c12358e 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5356,3 +5356,177 @@ error0:
}
return error;
}
+
+/*
+ * Update extents by shifting them downwards into a hole.
+ * At max count number of extents will be shifted and *current_ext
+ * is the extent number which is currently being shifted.
+ * This function will return error if the hole is not present
+ * while shifting extents. On success, 0 is returned.
+ */
+int
+xfs_bmap_shift_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int *done,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t shift,
+ xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock,
+ struct xfs_bmap_free *flist,
+ int count)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_bmbt_rec_host *gotp;
+ struct xfs_bmbt_irec left;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_extnum_t nexts = 0;
+ xfs_fileoff_t startoff;
+ int error = 0;
+ int i;
+ int whichfork = XFS_DATA_FORK;
+ int state;
+ int logflags;
+ xfs_filblks_t blockcount = 0;
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+ XFS_ERRLEVEL_LOW, mp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ /* Read in all the extents */
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ if (!*current_ext) {
+ gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+ /*
+ * gotp can be null in 2 cases: 1) if there are no extents
+ * or 2) start_fsb lies in a hole beyond which there are
+ * no extents. Either way, we are done.
+ */
+ if (!gotp) {
+ *done = 1;
+ return 0;
+ }
+ }
+
+ /* We are going to change core inode */
+ logflags = XFS_ILOG_CORE;
+
+ if (ifp->if_flags & XFS_IFBROOT) {
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ cur->bc_private.b.firstblock = *firstblock;
+ cur->bc_private.b.flist = flist;
+ cur->bc_private.b.flags = 0;
+ }
+ else {
+ cur = NULL;
+ logflags |= XFS_ILOG_DEXT;
+ }
+
+ while (nexts++ < count &&
+ *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
+ state = 0;
+
+ gotp = xfs_iext_get_ext(ifp, *current_ext);
+ startoff = xfs_bmbt_get_startoff(gotp);
+ startoff -= shift;
+
+ /*
+ * Before shifting extent into hole, make sure that the hole
+ * is large enough to accomodate the shift.
+ */
+ if (*current_ext) {
+ state |= BMAP_LEFT_VALID;
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+ *current_ext - 1), &left);
+
+ if (isnullstartblock(left.br_startblock))
+ state |= BMAP_LEFT_DELAY;
+
+ if (startoff < left.br_startoff + left.br_blockcount)
+ error = XFS_ERROR(EFSCORRUPTED);
+
+ } else if (startoff > xfs_bmbt_get_startoff(gotp))
+ /* Hole is at the start but not large enough */
+ error = XFS_ERROR(EFSCORRUPTED);
+
+ if (error)
+ goto del_cursor;
+
+ /* Check if we can merge 2 adjacent extents */
+ if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
+ left.br_startoff + left.br_blockcount == startoff &&
+ left.br_startblock + left.br_blockcount ==
+ xfs_bmbt_get_startblock(gotp) &&
+ xfs_bmbt_get_state(gotp) == left.br_state &&
+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp) <=
+ MAXEXTLEN) {
+ blockcount =
+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
+ state |= BMAP_LEFT_CONTIG;
+ xfs_iext_remove(ip, *current_ext, 1, 0);
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
+ }
+
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur,
+ xfs_bmbt_get_startoff(gotp),
+ xfs_bmbt_get_startblock(gotp),
+ xfs_bmbt_get_blockcount(gotp),
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ if (state & BMAP_LEFT_CONTIG) {
+ /* We have to update extent block count */
+ xfs_bmbt_set_blockcount(gotp, blockcount);
+ } else {
+ /* We have to update the startoff */
+ xfs_bmbt_set_startoff(gotp, startoff);
+ }
+
+ if (cur) {
+ error = xfs_bmbt_update(cur,
+ xfs_bmbt_get_startoff(gotp),
+ xfs_bmbt_get_startblock(gotp),
+ xfs_bmbt_get_blockcount(gotp),
+ xfs_bmbt_get_state(gotp));
+ if (error)
+ goto del_cursor;
+ }
+
+ (*current_ext)++;
+ }
+
+ /* Check if we are done */
+ if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
+ *done = 1;
+
+del_cursor:
+ if (cur)
+ xfs_btree_del_cursor(cur,
+ error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ xfs_trans_log_inode(tp, ip, logflags);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f3..b16ebfa 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -169,5 +169,8 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
+int xfs_bmap_shift_extents(struct xfs_trans *, struct xfs_inode *,
+ int *, xfs_fileoff_t, xfs_fileoff_t, xfs_extnum_t *,
+ xfs_fsblock_t *, struct xfs_bmap_free *, int);
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 541d59f..57f045e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1556,6 +1556,7 @@ xfs_change_file_space(
case XFS_IOC_RESVSP64:
case XFS_IOC_UNRESVSP:
case XFS_IOC_UNRESVSP64:
+ case XFS_COLLAPSE_RANGE:
if (bf->l_len <= 0)
return XFS_ERROR(EINVAL);
break;
@@ -1638,6 +1639,12 @@ xfs_change_file_space(
clrprealloc = 1;
break;
+ case XFS_COLLAPSE_RANGE:
+ error = xfs_collapse_file_space(ip, startoffset, bf->l_len,
+ attr_flags);
+ if (error)
+ return error;
+ break;
default:
ASSERT(0);
@@ -1683,6 +1690,95 @@ xfs_change_file_space(
return xfs_trans_commit(tp, 0);
}
+
+/*
+ * xfs_collapse_file_space: Implements the FALLOC_FL_COLLAPSE_SPACE flag.
+ */
+int
+xfs_collapse_file_space(
+ struct xfs_inode *ip,
+ loff_t offset,
+ loff_t len,
+ int attr_flags)
+{
+ int done = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ uint resblks;
+ struct xfs_trans *tp;
+ int error;
+ xfs_extnum_t current_ext = 0;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSB(mp, offset + len);
+ xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
+
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+ /*
+ * The first thing we do is to free data blocks in the specified range
+ * by calling xfs_free_file_space(). It would also sync dirty data
+ * and invalidate page cache over the region on which collapse range
+ * is working.
+ */
+
+ error = xfs_free_file_space(ip, offset, len, attr_flags);
+ if (error)
+ return error;
+
+ while (!error && !done) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ /*
+ * We would need to reserve permanent block for transaction.
+ * This will come into picture when after shifting extent into
+ * hole we found that adjacent extents can be merged which
+ * may lead to freeing of a block during record update.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
+ if (error) {
+ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ xfs_trans_cancel(tp, 0);
+ break;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp,
+ ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
+ resblks, 0, XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_bmap_init(&free_list, &first_block);
+
+ /*
+ * We are using the write transaction in which max 2 bmbt
+ * updates are allowed
+ */
+ error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+ shift_fsb, ¤t_ext,
+ &first_block, &free_list, 2);
+ if (error)
+ goto out;
+
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ goto out;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ return error;
+
+out:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 0612609..588d29d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -97,6 +97,8 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
xfs_flock64_t *bf, xfs_off_t offset,
int attr_flags);
+int xfs_collapse_file_space(struct xfs_inode *, loff_t, loff_t, int);
+
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 818c623..9c9c1ff 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -807,7 +807,8 @@ xfs_file_fallocate(
int cmd = XFS_IOC_RESVSP;
int attr_flags = XFS_ATTR_NOLOCK;
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE))
return -EOPNOTSUPP;
bf.l_whence = 0;
@@ -819,10 +820,19 @@ xfs_file_fallocate(
if (mode & FALLOC_FL_PUNCH_HOLE)
cmd = XFS_IOC_UNRESVSP;
- /* check the new inode size is valid before allocating */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ /* Shrink size in case of FALLOC_FL_COLLAPSE_RANGE */
+ if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ cmd = XFS_COLLAPSE_RANGE;
+ if ((offset + len) > i_size_read(inode))
+ new_size = offset;
+ else
+ new_size = i_size_read(inode) - len;
+ } else if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode))
new_size = offset + len;
+
+ /* check the new inode size is valid before allocating */
+ if (new_size || mode & FALLOC_FL_COLLAPSE_RANGE) {
error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
@@ -836,7 +846,7 @@ xfs_file_fallocate(
goto out_unlock;
/* Change file size if needed */
- if (new_size) {
+ if (new_size || mode & FALLOC_FL_COLLAPSE_RANGE) {
struct iattr iattr;
iattr.ia_valid = ATTR_SIZE;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 1edb5cc..99f5244 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -516,6 +516,12 @@ typedef struct xfs_swapext
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
+/*
+ * Although there is no ioctl implemented yet, we reserve an ioctl number for
+ * representing collapse range operation to avoid any possible collision in
+ * switch case of xfs_change_file_space.
+ */
+#define XFS_COLLAPSE_RANGE _IOW('X', 59, struct xfs_flock64)
/*
* ioctl commands that replace IRIX syssgi()'s
--
1.7.9.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2013-09-02 14:29 [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate Namjae Jeon
@ 2014-05-27 22:20 ` Mark Tinguely
2014-05-27 22:51 ` Dave Chinner
0 siblings, 1 reply; 11+ messages in thread
From: Mark Tinguely @ 2014-05-27 22:20 UTC (permalink / raw)
To: xfs
On 09/02/13 09:29, Namjae Jeon wrote:
> From: Namjae Jeon<namjae.jeon@samsung.com>
>
> Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
>
> Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
> Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
> ---
> + /* Check if we can merge 2 adjacent extents */
> + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
> + left.br_startoff + left.br_blockcount == startoff &&
> + left.br_startblock + left.br_blockcount ==
> + xfs_bmbt_get_startblock(gotp) &&
> + xfs_bmbt_get_state(gotp) == left.br_state &&
> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp) <=
> + MAXEXTLEN) {
> + blockcount =
> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
> + state |= BMAP_LEFT_CONTIG;
> + xfs_iext_remove(ip, *current_ext, 1, 0);
> + XFS_IFORK_NEXT_SET(ip, whichfork,
> + XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> + gotp = xfs_iext_get_ext(ifp, --*current_ext);
> + }
> +
> + if (cur) {
> + error = xfs_bmbt_lookup_eq(cur,
> + xfs_bmbt_get_startoff(gotp),
> + xfs_bmbt_get_startblock(gotp),
> + xfs_bmbt_get_blockcount(gotp),
> + &i);
> + if (error)
> + goto del_cursor;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a fsstress
that fills the filesystem:
xfstests > ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192 &
xfstests > xfs_info /mnt/scratch
meta-data=/dev/sda7 isize=256 agcount=4, agsize=2228515 blks
= sectsz=512 attr=2, projid32bit=1
= crc=0
data = bsize=4096 blocks=8914059, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=0
log =internal bsize=4096 blocks=4352, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
XFS: Assertion failed: fs_is_ok, file: fs/xfs/xfs_bmap.c, line: 5511
------------[ cut here ]------------
kernel BUG at fs/xfs/xfs_message.c:107!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC
Modules linked in: e1000e xfs exportfs libcrc32c ext3 jbd
CPU: 3 PID: 6341 Comm: fsstress Not tainted 3.15.0-rc5+ #1
Hardware name: SGI.COM AltixXE210/S5000PAL0,
BIOS S5000.86B.11.00.0096.011320091422 01/13/2009
task: ffff88081ec991a0 ti: ffff88081ecca000 task.ti: ffff88081ecca000
RIP: 0010:[<ffffffffa005a7cd>] [<ffffffffa005a7cd>] assfail+0x1d/0x30
[xfs]
RSP: 0018:ffff88081eccbcd8 EFLAGS: 00010296
RAX: 0000000000000044 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff88085bccda50 RSI: ffff88085bccd1c8 RDI: ffff88085bccd1c8
RBP: ffff88081eccbcd8 R08: 0000000000000316 R09: 0000000000000003
R10: 0000000000000316 R11: 0000000000000006 R12: 00000000000002d6
R13: ffff88009f778d00 R14: ffff88081eccbe10 R15: ffff8807fa0d4000
FS: 00007fd496c57700(0000) GS:ffff88085bcc0000(0000)
knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00007fd4902e0e88 CR3: 000000081ecbc000 CR4: 00000000000007e0
Stack:
ffff88081eccbda8 ffffffffa00769f4 ffff8805df5bbba0 0000000000000079
ffff88081eccbe14 ffff8805ddff5b00 0000000000000000 ffff8807fa0d4040
0000000100000001 ffff88066a5ddef0 00000000000002c5 0000000000c11a01
Call Trace:
[<ffffffffa00769f4>] xfs_bmap_shift_extents+0x184/0x5a0 [xfs]
[<ffffffffa0047c7c>] xfs_collapse_file_space+0x15c/0x250 [xfs]
[<ffffffffa004e094>] xfs_file_fallocate+0x354/0x380 [xfs]
[<ffffffff81142564>] ? __sb_start_write+0x64/0xf0
[<ffffffff81274e0c>] ? selinux_file_permission+0xec/0x130
[<ffffffff8113f270>] do_fallocate+0x150/0x170
[<ffffffff8113f2de>] SyS_fallocate+0x4e/0x80
[<ffffffff817b42e2>] system_call_fastpath+0x16/0x1b
Code: 00 00 00 48 89 45 c8 e8 42 fc ff ff c9 c3 55 41 89 d0 48 89 f1
48 89 fa 48 c7 c6 d0 3b 0d a0 31 ff 48 89 e5 31 c0 e8 93 ff ff ff <0f>
0b eb fe 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 0f
RIP [<ffffffffa005a7cd>] assfail+0x1d/0x30 [xfs]
RSP <ffff88081eccbcd8>
---[ end trace 40b8d37513d3de71 ]---
Kernel panic - not syncing: Fatal exception
Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range:
0xffffffff80000000-0xffffffff9fffffff)
---[ end Kernel panic - not syncing: Fatal exception
--Mark.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-27 22:20 ` Mark Tinguely
@ 2014-05-27 22:51 ` Dave Chinner
2014-05-27 22:56 ` Mark Tinguely
0 siblings, 1 reply; 11+ messages in thread
From: Dave Chinner @ 2014-05-27 22:51 UTC (permalink / raw)
To: Mark Tinguely; +Cc: xfs
On Tue, May 27, 2014 at 05:20:02PM -0500, Mark Tinguely wrote:
> On 09/02/13 09:29, Namjae Jeon wrote:
> >From: Namjae Jeon<namjae.jeon@samsung.com>
> >
> >Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
> >
> >Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
> >Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
> >---
>
> >+ /* Check if we can merge 2 adjacent extents */
> >+ if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
> >+ left.br_startoff + left.br_blockcount == startoff &&
> >+ left.br_startblock + left.br_blockcount ==
> >+ xfs_bmbt_get_startblock(gotp) &&
> >+ xfs_bmbt_get_state(gotp) == left.br_state &&
> >+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp) <=
> >+ MAXEXTLEN) {
> >+ blockcount =
> >+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
> >+ state |= BMAP_LEFT_CONTIG;
> >+ xfs_iext_remove(ip, *current_ext, 1, 0);
> >+ XFS_IFORK_NEXT_SET(ip, whichfork,
> >+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> >+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
> >+ }
> >+
> >+ if (cur) {
> >+ error = xfs_bmbt_lookup_eq(cur,
> >+ xfs_bmbt_get_startoff(gotp),
> >+ xfs_bmbt_get_startblock(gotp),
> >+ xfs_bmbt_get_blockcount(gotp),
> >+ &i);
> >+ if (error)
> >+ goto del_cursor;
> >+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
>
> I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a
> fsstress that fills the filesystem:
>
> xfstests > ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192 &
Hasn't reproduced after 10 minutes of running at ENOSPC here - how
long does it take to reproduce? What storage hardware are you
testing on? How many CPUs? RAM? ....
http://xfs.org/index.php/XFS_FAQ#Q:_What_information_should_I_include_when_reporting_a_problem.3F
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-27 22:51 ` Dave Chinner
@ 2014-05-27 22:56 ` Mark Tinguely
2014-05-28 0:29 ` Dave Chinner
0 siblings, 1 reply; 11+ messages in thread
From: Mark Tinguely @ 2014-05-27 22:56 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 05/27/14 17:51, Dave Chinner wrote:
> On Tue, May 27, 2014 at 05:20:02PM -0500, Mark Tinguely wrote:
>> On 09/02/13 09:29, Namjae Jeon wrote:
>>> From: Namjae Jeon<namjae.jeon@samsung.com>
>>>
>>> Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
>>>
>>> Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
>>> Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
>>> ---
>>
>>> + /* Check if we can merge 2 adjacent extents */
>>> + if ((state& BMAP_LEFT_VALID)&& !(state& BMAP_LEFT_DELAY)&&
>>> + left.br_startoff + left.br_blockcount == startoff&&
>>> + left.br_startblock + left.br_blockcount ==
>>> + xfs_bmbt_get_startblock(gotp)&&
>>> + xfs_bmbt_get_state(gotp) == left.br_state&&
>>> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp)<=
>>> + MAXEXTLEN) {
>>> + blockcount =
>>> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
>>> + state |= BMAP_LEFT_CONTIG;
>>> + xfs_iext_remove(ip, *current_ext, 1, 0);
>>> + XFS_IFORK_NEXT_SET(ip, whichfork,
>>> + XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
>>> + gotp = xfs_iext_get_ext(ifp, --*current_ext);
>>> + }
>>> +
>>> + if (cur) {
>>> + error = xfs_bmbt_lookup_eq(cur,
>>> + xfs_bmbt_get_startoff(gotp),
>>> + xfs_bmbt_get_startblock(gotp),
>>> + xfs_bmbt_get_blockcount(gotp),
>>> + &i);
>>> + if (error)
>>> + goto del_cursor;
>>> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
>>
>> I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a
>> fsstress that fills the filesystem:
>>
>> xfstests> ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192&
>
> Hasn't reproduced after 10 minutes of running at ENOSPC here - how
> long does it take to reproduce? What storage hardware are you
> testing on? How many CPUs? RAM? ....
>
> http://xfs.org/index.php/XFS_FAQ#Q:_What_information_should_I_include_when_reporting_a_problem.3F
>
> Cheers,
>
> Dave.
A 7-8 hours on spinning rust. This is my burn in test.
--Mark.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-27 22:56 ` Mark Tinguely
@ 2014-05-28 0:29 ` Dave Chinner
2014-05-28 22:48 ` Dave Chinner
2014-05-29 14:27 ` Mark Tinguely
0 siblings, 2 replies; 11+ messages in thread
From: Dave Chinner @ 2014-05-28 0:29 UTC (permalink / raw)
To: Mark Tinguely; +Cc: xfs
On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
> On 05/27/14 17:51, Dave Chinner wrote:
> >On Tue, May 27, 2014 at 05:20:02PM -0500, Mark Tinguely wrote:
> >>On 09/02/13 09:29, Namjae Jeon wrote:
> >>>From: Namjae Jeon<namjae.jeon@samsung.com>
> >>>
> >>>Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
> >>>
> >>>Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
> >>>Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
> >>>---
> >>
> >>>+ /* Check if we can merge 2 adjacent extents */
> >>>+ if ((state& BMAP_LEFT_VALID)&& !(state& BMAP_LEFT_DELAY)&&
> >>>+ left.br_startoff + left.br_blockcount == startoff&&
> >>>+ left.br_startblock + left.br_blockcount ==
> >>>+ xfs_bmbt_get_startblock(gotp)&&
> >>>+ xfs_bmbt_get_state(gotp) == left.br_state&&
> >>>+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp)<=
> >>>+ MAXEXTLEN) {
> >>>+ blockcount =
> >>>+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
> >>>+ state |= BMAP_LEFT_CONTIG;
> >>>+ xfs_iext_remove(ip, *current_ext, 1, 0);
> >>>+ XFS_IFORK_NEXT_SET(ip, whichfork,
> >>>+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> >>>+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
> >>>+ }
> >>>+
> >>>+ if (cur) {
> >>>+ error = xfs_bmbt_lookup_eq(cur,
> >>>+ xfs_bmbt_get_startoff(gotp),
> >>>+ xfs_bmbt_get_startblock(gotp),
> >>>+ xfs_bmbt_get_blockcount(gotp),
> >>>+ &i);
> >>>+ if (error)
> >>>+ goto del_cursor;
> >>>+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> >>
> >>I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a
> >>fsstress that fills the filesystem:
> >>
> >>xfstests> ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192&
> >
> >Hasn't reproduced after 10 minutes of running at ENOSPC here - how
> >long does it take to reproduce? What storage hardware are you
> >testing on? How many CPUs? RAM? ....
> >
> >http://xfs.org/index.php/XFS_FAQ#Q:_What_information_should_I_include_when_reporting_a_problem.3F
> >
> >Cheers,
> >
> >Dave.
>
> A 7-8 hours on spinning rust. This is my burn in test.
Can you try to narrow the problem down? Otherwise it's going to be a
case of looking for a needle in a haystack....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-28 0:29 ` Dave Chinner
@ 2014-05-28 22:48 ` Dave Chinner
2014-05-29 14:27 ` Mark Tinguely
1 sibling, 0 replies; 11+ messages in thread
From: Dave Chinner @ 2014-05-28 22:48 UTC (permalink / raw)
To: Mark Tinguely; +Cc: xfs
On Wed, May 28, 2014 at 10:29:06AM +1000, Dave Chinner wrote:
> On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
> > On 05/27/14 17:51, Dave Chinner wrote:
> > >On Tue, May 27, 2014 at 05:20:02PM -0500, Mark Tinguely wrote:
> > >>On 09/02/13 09:29, Namjae Jeon wrote:
> > >>>From: Namjae Jeon<namjae.jeon@samsung.com>
> > >>>
> > >>>Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
> > >>>
> > >>>Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
> > >>>Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
> > >>>---
> > >>
> > >>>+ /* Check if we can merge 2 adjacent extents */
> > >>>+ if ((state& BMAP_LEFT_VALID)&& !(state& BMAP_LEFT_DELAY)&&
> > >>>+ left.br_startoff + left.br_blockcount == startoff&&
> > >>>+ left.br_startblock + left.br_blockcount ==
> > >>>+ xfs_bmbt_get_startblock(gotp)&&
> > >>>+ xfs_bmbt_get_state(gotp) == left.br_state&&
> > >>>+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp)<=
> > >>>+ MAXEXTLEN) {
> > >>>+ blockcount =
> > >>>+ left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
> > >>>+ state |= BMAP_LEFT_CONTIG;
> > >>>+ xfs_iext_remove(ip, *current_ext, 1, 0);
> > >>>+ XFS_IFORK_NEXT_SET(ip, whichfork,
> > >>>+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> > >>>+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
> > >>>+ }
> > >>>+
> > >>>+ if (cur) {
> > >>>+ error = xfs_bmbt_lookup_eq(cur,
> > >>>+ xfs_bmbt_get_startoff(gotp),
> > >>>+ xfs_bmbt_get_startblock(gotp),
> > >>>+ xfs_bmbt_get_blockcount(gotp),
> > >>>+ &i);
> > >>>+ if (error)
> > >>>+ goto del_cursor;
> > >>>+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> > >>
> > >>I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a
> > >>fsstress that fills the filesystem:
> > >>
> > >>xfstests> ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192&
> > >
> > >Hasn't reproduced after 10 minutes of running at ENOSPC here - how
> > >long does it take to reproduce? What storage hardware are you
> > >testing on? How many CPUs? RAM? ....
> > >
> > >http://xfs.org/index.php/XFS_FAQ#Q:_What_information_should_I_include_when_reporting_a_problem.3F
> > >
> > >Cheers,
> > >
> > >Dave.
> >
> > A 7-8 hours on spinning rust. This is my burn in test.
>
> Can you try to narrow the problem down? Otherwise it's going to be a
> case of looking for a needle in a haystack....
The VMs I've left this running on have now been doing so for
almost 24 hours without triggering any issues. I'm really going to
need a more refined reproducer for this to confirm it is a real
problem and not a new machine burn-in issue...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-28 0:29 ` Dave Chinner
2014-05-28 22:48 ` Dave Chinner
@ 2014-05-29 14:27 ` Mark Tinguely
2014-05-31 0:39 ` Dave Chinner
1 sibling, 1 reply; 11+ messages in thread
From: Mark Tinguely @ 2014-05-29 14:27 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 05/27/14 19:29, Dave Chinner wrote:
> On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
>> On 05/27/14 17:51, Dave Chinner wrote:
>>> On Tue, May 27, 2014 at 05:20:02PM -0500, Mark Tinguely wrote:
>>>> On 09/02/13 09:29, Namjae Jeon wrote:
>>>>> From: Namjae Jeon<namjae.jeon@samsung.com>
>>>>>
>>>>> Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.
>>>>>
>>>>> Signed-off-by: Namjae Jeon<namjae.jeon@samsung.com>
>>>>> Signed-off-by: Ashish Sangwan<a.sangwan@samsung.com>
>>>>> ---
>>>>
>>>>> + /* Check if we can merge 2 adjacent extents */
>>>>> + if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY)&&
>>>>> + left.br_startoff + left.br_blockcount == startoff &&
>>>>> + left.br_startblock + left.br_blockcount ==
>>>>> + xfs_bmbt_get_startblock(gotp) &&
>>>>> + xfs_bmbt_get_state(gotp) == left.br_state &&
>>>>> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp)<=
>>>>> + MAXEXTLEN) {
>>>>> + blockcount =
>>>>> + left.br_blockcount + xfs_bmbt_get_blockcount(gotp);
>>>>> + state |= BMAP_LEFT_CONTIG;
>>>>> + xfs_iext_remove(ip, *current_ext, 1, 0);
>>>>> + XFS_IFORK_NEXT_SET(ip, whichfork,
>>>>> + XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
>>>>> + gotp = xfs_iext_get_ext(ifp, --*current_ext);
>>>>> + }
>>>>> +
>>>>> + if (cur) {
>>>>> + error = xfs_bmbt_lookup_eq(cur,
>>>>> + xfs_bmbt_get_startoff(gotp),
>>>>> + xfs_bmbt_get_startblock(gotp),
>>>>> + xfs_bmbt_get_blockcount(gotp),
>>>>> + &i);
>>>>> + if (error)
>>>>> + goto del_cursor;
>>>>> + XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
>>>>
>>>> I can reliably trigger this XFS_WANT_CORRUPTED_GOTO() with a
>>>> fsstress that fills the filesystem:
>>>>
>>>> xfstests> ltp/fsstress -d /mnt/scratch -s 1370236858 -p 512 -n 8192&
>>>
>>> Hasn't reproduced after 10 minutes of running at ENOSPC here - how
>>> long does it take to reproduce? What storage hardware are you
>>> testing on? How many CPUs? RAM? ....
>>>
>>> http://xfs.org/index.php/XFS_FAQ#Q:_What_information_should_I_include_when_reporting_a_problem.3F
>>>
>>> Cheers,
>>>
>>> Dave.
>>
>> A 7-8 hours on spinning rust. This is my burn in test.
>
> Can you try to narrow the problem down? Otherwise it's going to be a
> case of looking for a needle in a haystack....
>
> Cheers,
>
> Dave.
Nod on the needle in a hay stack if it bmbt is really corrupt.
I am running fsstress from xfstests with the top commit 9b7f704, and I
don't see any newer fsstress patches since then.
I moved the test to another box with a kdump that works on top of tree
Linux and grabbed a vmcore. I grabbed a metadata dump of the filesystem
after the ASSERT. That should give some idea of what inode/block it was
looking up.
I sent email to Namjae when I first tripped over this problem in late
April. No longer on the face of the earth and I can't look at this until
the weekend.
--Mark.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-29 14:27 ` Mark Tinguely
@ 2014-05-31 0:39 ` Dave Chinner
2014-06-01 1:22 ` Mark Tinguely
0 siblings, 1 reply; 11+ messages in thread
From: Dave Chinner @ 2014-05-31 0:39 UTC (permalink / raw)
To: Mark Tinguely; +Cc: xfs
On Thu, May 29, 2014 at 09:27:44AM -0500, Mark Tinguely wrote:
> On 05/27/14 19:29, Dave Chinner wrote:
> >On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
> >>A 7-8 hours on spinning rust. This is my burn in test.
> >
> >Can you try to narrow the problem down? Otherwise it's going to be a
> >case of looking for a needle in a haystack....
>
> Nod on the needle in a hay stack if it bmbt is really corrupt.
>
> I am running fsstress from xfstests with the top commit 9b7f704, and
> I don't see any newer fsstress patches since then.
>
> I moved the test to another box with a kdump that works on top of
> tree Linux and grabbed a vmcore. I grabbed a metadata dump of the
> filesystem after the ASSERT. That should give some idea of what
> inode/block it was looking up.
>
> I sent email to Namjae when I first tripped over this problem in
> late April. No longer on the face of the earth and I can't look at
> this until the weekend.
No worries - it looks pretty hard to hit, so it's not something we
urgently need to track down. Any time you can spare to try to narrow
it down would be great!
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-05-31 0:39 ` Dave Chinner
@ 2014-06-01 1:22 ` Mark Tinguely
2014-06-01 1:25 ` Mark Tinguely
2014-06-01 22:43 ` Dave Chinner
0 siblings, 2 replies; 11+ messages in thread
From: Mark Tinguely @ 2014-06-01 1:22 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 05/30/14 19:39, Dave Chinner wrote:
> On Thu, May 29, 2014 at 09:27:44AM -0500, Mark Tinguely wrote:
>> On 05/27/14 19:29, Dave Chinner wrote:
>>> On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
>>>> A 7-8 hours on spinning rust. This is my burn in test.
>>>
>>> Can you try to narrow the problem down? Otherwise it's going to be a
>>> case of looking for a needle in a haystack....
>>
>> Nod on the needle in a hay stack if it bmbt is really corrupt.
>>
>> I am running fsstress from xfstests with the top commit 9b7f704, and
>> I don't see any newer fsstress patches since then.
>>
>> I moved the test to another box with a kdump that works on top of
>> tree Linux and grabbed a vmcore. I grabbed a metadata dump of the
>> filesystem after the ASSERT. That should give some idea of what
>> inode/block it was looking up.
>>
>> I sent email to Namjae when I first tripped over this problem in
>> late April. No longer on the face of the earth and I can't look at
>> this until the weekend.
>
> No worries - it looks pretty hard to hit, so it's not something we
> urgently need to track down. Any time you can spare to try to narrow
> it down would be great!
>
> Cheers,
>
> Dave.
The xfs_inode thinks there are 11 bmbt entries when there should only be 11:
i_df = {
if_bytes = 0xb0, <- here 11 entries 0x10 bytes long
if_real_bytes = 0x100,
if_broot = 0xffff88009f74c680,
if_broot_bytes = 0x28,
if_flags = 0x6,
if_u1 = {
if_extents = 0xffff88033c44a000, <-
if_ext_irec = 0xffff88033c44a000,
if_data = 0xffff88033c44a000 ""
},
Looking at the if_extents[]:
crash> rd ffff88033c44a000 32
ffff88033c44a000: 8000000000000200 000000b601800021 ........!.......
ffff88033c44a010: 0000000000004400 000000449a000007 .D..........D...
ffff88033c44a020: 0000000000005200 000002f897e00004 .R..............
ffff88033c44a030: 8000000000005a00 000002f898600033 .Z......3.`.....
ffff88033c44a040: 000000000000c000 000002f89ec00001 ................
ffff88033c44a050: 0000000000015c00 000005fdfba00010 .\..............
ffff88033c44a060: 0000000000017c00 00000eab00400006 .|........@.....
ffff88033c44a070: 000000000001f800 00000ec752c00004 ...........R....
ffff88033c44a080: 0000000000020000 00000e8ae6800004 ................
ffff88033c44a090: 0000000000020800 00000e7167e00004 ...........gq...
ffff88033c44a0a0: 000000000002bfff ffffffc000a00001 ................
^^^^ bad ^^^^
It appears that current_ext is 10 (11th entry).
The assert is on the bad entry.
xfs_db thinks there are 11 entries:
recs[1-11] = [startoff,startblock,blockcount,extentflag]
1:[1,372748,33,1] 2:[34,140496,18,0] 3:[52,1557619,53,1]
4:[105,1557672,27,0] 5:[132,1557699,51,1] 6:[183,1557750,1,0]
7:[261,3141597,16,0] 8:[277,7690242,6,0] 9:[339,7748246,4,0]
10:[343,7624500,4,0] 11:[347,7572287,4,0]
xfs_db> fsb 4262789
xfs_db> type text
xfs_db> p
000: 42 4d 41 50 00 00 00 0b ff ff ff ff ff ff ff ff BMAP............
010: ff ff ff ff ff ff ff ff 80 00 00 00 00 00 02 00 ................
020: 00 00 00 b6 01 80 00 21 00 00 00 00 00 00 44 00 ..............D.
030: 00 00 00 44 9a 00 00 12 80 00 00 00 00 00 68 00 ...D..........h.
040: 00 00 02 f8 8e 60 00 35 00 00 00 00 00 00 d2 00 .......5........
050: 00 00 02 f8 95 00 00 1b 80 00 00 00 00 01 08 00 ................
060: 00 00 02 f8 98 60 00 33 00 00 00 00 00 01 6e 00 .......3......n.
070: 00 00 02 f8 9e c0 00 01 00 00 00 00 00 02 0a 00 ................
080: 00 00 05 fd fb a0 00 10 00 00 00 00 00 02 2a 00 ................
090: 00 00 0e ab 00 40 00 06 00 00 00 00 00 02 a6 00 ................
0a0: 00 00 0e c7 52 c0 00 04 00 00 00 00 00 02 ae 00 ....R...........
0b0: 00 00 0e 8a e6 80 00 04 00 00 00 00 00 02 b6 00 ................
0c0: 00 00 0e 71 67 e0 00 04 00 00 00 00 00 00 00 00 ...qg...........
0d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
This xfs_db is before log replay, but it appears that the 3 extent is
missing in the data fork, everything shifted up and a garbage entry in
entry 11.
--Mark.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-06-01 1:22 ` Mark Tinguely
@ 2014-06-01 1:25 ` Mark Tinguely
2014-06-01 22:43 ` Dave Chinner
1 sibling, 0 replies; 11+ messages in thread
From: Mark Tinguely @ 2014-06-01 1:25 UTC (permalink / raw)
To: xfs
On 05/31/14 20:22, Mark Tinguely wrote:
>
> The xfs_inode thinks there are 11 bmbt entries when there should only be
> 11:
> i_df = {
> if_bytes = 0xb0, <- here 11 entries 0x10 bytes long
sorry bad editing when I could not do hex math. There are 11 entries,
should be 11 entries, but we have one missing entry and one garbage entry.
--Mark.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
2014-06-01 1:22 ` Mark Tinguely
2014-06-01 1:25 ` Mark Tinguely
@ 2014-06-01 22:43 ` Dave Chinner
1 sibling, 0 replies; 11+ messages in thread
From: Dave Chinner @ 2014-06-01 22:43 UTC (permalink / raw)
To: Mark Tinguely; +Cc: xfs
On Sat, May 31, 2014 at 08:22:44PM -0500, Mark Tinguely wrote:
> On 05/30/14 19:39, Dave Chinner wrote:
> >On Thu, May 29, 2014 at 09:27:44AM -0500, Mark Tinguely wrote:
> >>On 05/27/14 19:29, Dave Chinner wrote:
> >>>On Tue, May 27, 2014 at 05:56:54PM -0500, Mark Tinguely wrote:
> >>>>A 7-8 hours on spinning rust. This is my burn in test.
> >>>
> >>>Can you try to narrow the problem down? Otherwise it's going to be a
> >>>case of looking for a needle in a haystack....
> >>
> >>Nod on the needle in a hay stack if it bmbt is really corrupt.
> >>
> >>I am running fsstress from xfstests with the top commit 9b7f704, and
> >>I don't see any newer fsstress patches since then.
> >>
> >>I moved the test to another box with a kdump that works on top of
> >>tree Linux and grabbed a vmcore. I grabbed a metadata dump of the
> >>filesystem after the ASSERT. That should give some idea of what
> >>inode/block it was looking up.
> >>
> >>I sent email to Namjae when I first tripped over this problem in
> >>late April. No longer on the face of the earth and I can't look at
> >>this until the weekend.
> >
> >No worries - it looks pretty hard to hit, so it's not something we
> >urgently need to track down. Any time you can spare to try to narrow
> >it down would be great!
> >
> >Cheers,
> >
> >Dave.
>
> The xfs_inode thinks there are 11 bmbt entries when there should only be 11:
> i_df = {
> if_bytes = 0xb0, <- here 11 entries 0x10 bytes long
> if_real_bytes = 0x100,
> if_broot = 0xffff88009f74c680,
> if_broot_bytes = 0x28,
> if_flags = 0x6,
> if_u1 = {
> if_extents = 0xffff88033c44a000, <-
> if_ext_irec = 0xffff88033c44a000,
> if_data = 0xffff88033c44a000 ""
> },
>
> Looking at the if_extents[]:
>
> crash> rd ffff88033c44a000 32
> ffff88033c44a000: 8000000000000200 000000b601800021 ........!.......
> ffff88033c44a010: 0000000000004400 000000449a000007 .D..........D...
> ffff88033c44a020: 0000000000005200 000002f897e00004 .R..............
> ffff88033c44a030: 8000000000005a00 000002f898600033 .Z......3.`.....
> ffff88033c44a040: 000000000000c000 000002f89ec00001 ................
> ffff88033c44a050: 0000000000015c00 000005fdfba00010 .\..............
> ffff88033c44a060: 0000000000017c00 00000eab00400006 .|........@.....
> ffff88033c44a070: 000000000001f800 00000ec752c00004 ...........R....
> ffff88033c44a080: 0000000000020000 00000e8ae6800004 ................
> ffff88033c44a090: 0000000000020800 00000e7167e00004 ...........gq...
> ffff88033c44a0a0: 000000000002bfff ffffffc000a00001 ................
> ^^^^ bad ^^^^
> It appears that current_ext is 10 (11th entry).
> The assert is on the bad entry.
I don't think that's bad - it looks like a NULL start block which
means an in-memory extent. i.e. a delayed allocation block with a
indirect reservation of 1 block and a length of ~0x40 blocks?
> xfs_db thinks there are 11 entries:
>
> recs[1-11] = [startoff,startblock,blockcount,extentflag]
> 1:[1,372748,33,1] 2:[34,140496,18,0] 3:[52,1557619,53,1]
> 4:[105,1557672,27,0] 5:[132,1557699,51,1] 6:[183,1557750,1,0]
> 7:[261,3141597,16,0] 8:[277,7690242,6,0] 9:[339,7748246,4,0]
> 10:[343,7624500,4,0] 11:[347,7572287,4,0]
>
> xfs_db> fsb 4262789
> xfs_db> type text
> xfs_db> p
> 000: 42 4d 41 50 00 00 00 0b ff ff ff ff ff ff ff ff BMAP............
> 010: ff ff ff ff ff ff ff ff 80 00 00 00 00 00 02 00 ................
> 020: 00 00 00 b6 01 80 00 21 00 00 00 00 00 00 44 00 ..............D.
> 030: 00 00 00 44 9a 00 00 12 80 00 00 00 00 00 68 00 ...D..........h.
> 040: 00 00 02 f8 8e 60 00 35 00 00 00 00 00 00 d2 00 .......5........
> 050: 00 00 02 f8 95 00 00 1b 80 00 00 00 00 01 08 00 ................
> 060: 00 00 02 f8 98 60 00 33 00 00 00 00 00 01 6e 00 .......3......n.
> 070: 00 00 02 f8 9e c0 00 01 00 00 00 00 00 02 0a 00 ................
> 080: 00 00 05 fd fb a0 00 10 00 00 00 00 00 02 2a 00 ................
> 090: 00 00 0e ab 00 40 00 06 00 00 00 00 00 02 a6 00 ................
> 0a0: 00 00 0e c7 52 c0 00 04 00 00 00 00 00 02 ae 00 ....R...........
> 0b0: 00 00 0e 8a e6 80 00 04 00 00 00 00 00 02 b6 00 ................
> 0c0: 00 00 0e 71 67 e0 00 04 00 00 00 00 00 00 00 00 ...qg...........
> 0d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> 0e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
>
> This xfs_db is before log replay, but it appears that the 3 extent is
> missing in the data fork, everything shifted up and a garbage entry
> in entry 11.
There's very few identical extents between those two lists - the
first is the same, the second has the same start offset and block
but is much shorted, and all the others are completely different.
So this is looking like a delalloc extent when the code is not
expecting it?
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2014-06-01 22:43 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-02 14:29 [PATCH v2 2/7] xfs: add support FALLOC_FL_COLLAPSE_RANGE for fallocate Namjae Jeon
2014-05-27 22:20 ` Mark Tinguely
2014-05-27 22:51 ` Dave Chinner
2014-05-27 22:56 ` Mark Tinguely
2014-05-28 0:29 ` Dave Chinner
2014-05-28 22:48 ` Dave Chinner
2014-05-29 14:27 ` Mark Tinguely
2014-05-31 0:39 ` Dave Chinner
2014-06-01 1:22 ` Mark Tinguely
2014-06-01 1:25 ` Mark Tinguely
2014-06-01 22:43 ` Dave Chinner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox