public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Fixes for ENOSPC xfs_remove
@ 2023-11-29  7:58 Jiachen Zhang
  2023-11-29  7:58 ` [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real Jiachen Zhang
  2023-11-29  7:58 ` [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap Jiachen Zhang
  0 siblings, 2 replies; 5+ messages in thread
From: Jiachen Zhang @ 2023-11-29  7:58 UTC (permalink / raw)
  To: Chandan Babu R, Darrick J. Wong
  Cc: Dave Chinner, Allison Henderson, Zhang Tianci, Brian Foster,
	Ben Myers, linux-xfs, linux-kernel, xieyongji, me, Jiachen Zhang

Hi,

Recently, our use-case ran into 2 bugs in case doing xfs_remove when the
disk space is in-pressure, which may cause xfs shutdown and kernel crash
in the xfs log recovery procedure. Here are 2 patches to fix the
problem.

The 1st patch fixes an uninitialized variable issue.

The 2nd patch ensures the blkno in the xfs_buf is updated when doing
xfs_da3_swap_lastblock().

Compared with the V1 patchset, this V2 patchset
- directly set the *logflagsp value to make the code more robust in the
  1st commit,
- check xfs's crc-feature rather than magic in the 2nd commit, and
- fixed code style and rebased onto the master branch.


Thanks,
Jiachen

Jiachen Zhang (1):
  xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real

Zhang Tianci (1):
  xfs: update dir3 leaf block metadata after swap

 fs/xfs/libxfs/xfs_bmap.c     | 26 ++++++++++++++------------
 fs/xfs/libxfs/xfs_da_btree.c | 11 ++++++++++-
 2 files changed, 24 insertions(+), 13 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real
  2023-11-29  7:58 [PATCH v2 0/2] Fixes for ENOSPC xfs_remove Jiachen Zhang
@ 2023-11-29  7:58 ` Jiachen Zhang
  2023-11-29  9:04   ` Dave Chinner
  2023-11-29  7:58 ` [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap Jiachen Zhang
  1 sibling, 1 reply; 5+ messages in thread
From: Jiachen Zhang @ 2023-11-29  7:58 UTC (permalink / raw)
  To: Chandan Babu R, Darrick J. Wong
  Cc: Dave Chinner, Allison Henderson, Zhang Tianci, Brian Foster,
	Ben Myers, linux-xfs, linux-kernel, xieyongji, me, Jiachen Zhang,
	Christoph Hellwig

In the case of returning -ENOSPC, ensure logflagsp is initialized by 0.
Otherwise the caller __xfs_bunmapi will set uninitialized illegal
tmp_logflags value into xfs log, which might cause unpredictable error
in the log recovery procedure.

Also, remove the flags variable and set the *logflagsp directly, so that
the code should be more robust in the long run.

Fixes: 1b24b633aafe ("xfs: move some more code into xfs_bmap_del_extent_real")
Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index be62acffad6c..9435bd6c950b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5010,7 +5010,6 @@ xfs_bmap_del_extent_real(
 	xfs_fileoff_t		del_endoff;	/* first offset past del */
 	int			do_fx;	/* free extent at end of routine */
 	int			error;	/* error return value */
-	int			flags = 0;/* inode logging flags */
 	struct xfs_bmbt_irec	got;	/* current extent entry */
 	xfs_fileoff_t		got_endoff;	/* first offset past got */
 	int			i;	/* temp state */
@@ -5023,6 +5022,8 @@ xfs_bmap_del_extent_real(
 	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
 	struct xfs_bmbt_irec	old;
 
+	*logflagsp = 0;
+
 	mp = ip->i_mount;
 	XFS_STATS_INC(mp, xs_del_exlist);
 
@@ -5048,10 +5049,12 @@ xfs_bmap_del_extent_real(
 	if (tp->t_blk_res == 0 &&
 	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
 	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
-	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
-		return -ENOSPC;
+	    del->br_startoff > got.br_startoff && del_endoff < got_endoff) {
+		error = -ENOSPC;
+		goto done;
+	}
 
-	flags = XFS_ILOG_CORE;
+	*logflagsp = XFS_ILOG_CORE;
 	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
 		if (!(bflags & XFS_BMAPI_REMAP)) {
 			error = xfs_rtfree_blocks(tp, del->br_startblock,
@@ -5093,9 +5096,9 @@ xfs_bmap_del_extent_real(
 		xfs_iext_prev(ifp, icur);
 		ifp->if_nextents--;
 
-		flags |= XFS_ILOG_CORE;
+		*logflagsp |= XFS_ILOG_CORE;
 		if (!cur) {
-			flags |= xfs_ilog_fext(whichfork);
+			*logflagsp |= xfs_ilog_fext(whichfork);
 			break;
 		}
 		if ((error = xfs_btree_delete(cur, &i)))
@@ -5114,7 +5117,7 @@ xfs_bmap_del_extent_real(
 		got.br_blockcount -= del->br_blockcount;
 		xfs_iext_update_extent(ip, state, icur, &got);
 		if (!cur) {
-			flags |= xfs_ilog_fext(whichfork);
+			*logflagsp |= xfs_ilog_fext(whichfork);
 			break;
 		}
 		error = xfs_bmbt_update(cur, &got);
@@ -5128,7 +5131,7 @@ xfs_bmap_del_extent_real(
 		got.br_blockcount -= del->br_blockcount;
 		xfs_iext_update_extent(ip, state, icur, &got);
 		if (!cur) {
-			flags |= xfs_ilog_fext(whichfork);
+			*logflagsp |= xfs_ilog_fext(whichfork);
 			break;
 		}
 		error = xfs_bmbt_update(cur, &got);
@@ -5150,7 +5153,7 @@ xfs_bmap_del_extent_real(
 		new.br_state = got.br_state;
 		new.br_startblock = del_endblock;
 
-		flags |= XFS_ILOG_CORE;
+		*logflagsp |= XFS_ILOG_CORE;
 		if (cur) {
 			error = xfs_bmbt_update(cur, &got);
 			if (error)
@@ -5191,7 +5194,7 @@ xfs_bmap_del_extent_real(
 				 * to the original value.
 				 */
 				xfs_iext_update_extent(ip, state, icur, &old);
-				flags = 0;
+				*logflagsp = 0;
 				error = -ENOSPC;
 				goto done;
 			}
@@ -5200,7 +5203,7 @@ xfs_bmap_del_extent_real(
 				goto done;
 			}
 		} else
-			flags |= xfs_ilog_fext(whichfork);
+			*logflagsp |= xfs_ilog_fext(whichfork);
 
 		ifp->if_nextents++;
 		xfs_iext_next(ifp, icur);
@@ -5240,7 +5243,6 @@ xfs_bmap_del_extent_real(
 		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
 
 done:
-	*logflagsp = flags;
 	return error;
 }
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap
  2023-11-29  7:58 [PATCH v2 0/2] Fixes for ENOSPC xfs_remove Jiachen Zhang
  2023-11-29  7:58 ` [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real Jiachen Zhang
@ 2023-11-29  7:58 ` Jiachen Zhang
  2023-11-29  8:59   ` Dave Chinner
  1 sibling, 1 reply; 5+ messages in thread
From: Jiachen Zhang @ 2023-11-29  7:58 UTC (permalink / raw)
  To: Chandan Babu R, Darrick J. Wong
  Cc: Dave Chinner, Allison Henderson, Zhang Tianci, Brian Foster,
	Ben Myers, linux-xfs, linux-kernel, xieyongji, me, Dave Chinner

From: Zhang Tianci <zhangtianci.1997@bytedance.com>

xfs_da3_swap_lastblock() copy the last block content to the dead block,
but do not update the metadata in it. We need update some metadata
for some kinds of type block, such as dir3 leafn block records its
blkno, we shall update it to the dead block blkno. Otherwise,
before write the xfs_buf to disk, the verify_write() will fail in
blk_hdr->blkno != xfs_buf->b_bn, then xfs will be shutdown.

We will get this warning:

  XFS (dm-0): Metadata corruption detected at xfs_dir3_leaf_verify+0xa8/0xe0 [xfs], xfs_dir3_leafn block 0x178
  XFS (dm-0): Unmount and run xfs_repair
  XFS (dm-0): First 128 bytes of corrupted metadata buffer:
  00000000e80f1917: 00 80 00 0b 00 80 00 07 3d ff 00 00 00 00 00 00  ........=.......
  000000009604c005: 00 00 00 00 00 00 01 a0 00 00 00 00 00 00 00 00  ................
  000000006b6fb2bf: e4 44 e3 97 b5 64 44 41 8b 84 60 0e 50 43 d9 bf  .D...dDA..`.PC..
  00000000678978a2: 00 00 00 00 00 00 00 83 01 73 00 93 00 00 00 00  .........s......
  00000000b28b247c: 99 29 1d 38 00 00 00 00 99 29 1d 40 00 00 00 00  .).8.....).@....
  000000002b2a662c: 99 29 1d 48 00 00 00 00 99 49 11 00 00 00 00 00  .).H.....I......
  00000000ea2ffbb8: 99 49 11 08 00 00 45 25 99 49 11 10 00 00 48 fe  .I....E%.I....H.
  0000000069e86440: 99 49 11 18 00 00 4c 6b 99 49 11 20 00 00 4d 97  .I....Lk.I. ..M.
  XFS (dm-0): xfs_do_force_shutdown(0x8) called from line 1423 of file fs/xfs/xfs_buf.c.  Return address = 00000000c0ff63c1
  XFS (dm-0): Corruption of in-memory data detected.  Shutting down filesystem
  XFS (dm-0): Please umount the filesystem and rectify the problem(s)

From the log above, we know xfs_buf->b_no is 0x178, but the block's hdr record
its blkno is 0x1a0.

Fixes: 24df33b45ecf ("xfs: add CRC checking to dir2 leaf blocks")
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_da_btree.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e576560b46e9..d11e6286e466 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2318,8 +2318,17 @@ xfs_da3_swap_lastblock(
 	 * Copy the last block into the dead buffer and log it.
 	 */
 	memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
-	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
 	dead_info = dead_buf->b_addr;
+	/*
+	 * If xfs enable crc, the node/leaf block records its blkno, we
+	 * must update it.
+	 */
+	if (xfs_has_crc(mp)) {
+		struct xfs_da3_blkinfo *da3 = container_of(dead_info, struct xfs_da3_blkinfo, hdr);
+
+		da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf));
+	}
+	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
 	/*
 	 * Get values from the moved block.
 	 */
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap
  2023-11-29  7:58 ` [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap Jiachen Zhang
@ 2023-11-29  8:59   ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2023-11-29  8:59 UTC (permalink / raw)
  To: Jiachen Zhang
  Cc: Chandan Babu R, Darrick J. Wong, Dave Chinner, Allison Henderson,
	Zhang Tianci, Brian Foster, Ben Myers, linux-xfs, linux-kernel,
	xieyongji, me

On Wed, Nov 29, 2023 at 03:58:32PM +0800, Jiachen Zhang wrote:
> From: Zhang Tianci <zhangtianci.1997@bytedance.com>
> 
> xfs_da3_swap_lastblock() copy the last block content to the dead block,
> but do not update the metadata in it. We need update some metadata
> for some kinds of type block, such as dir3 leafn block records its
> blkno, we shall update it to the dead block blkno. Otherwise,
> before write the xfs_buf to disk, the verify_write() will fail in
> blk_hdr->blkno != xfs_buf->b_bn, then xfs will be shutdown.
> 
> We will get this warning:
> 
>   XFS (dm-0): Metadata corruption detected at xfs_dir3_leaf_verify+0xa8/0xe0 [xfs], xfs_dir3_leafn block 0x178
>   XFS (dm-0): Unmount and run xfs_repair
>   XFS (dm-0): First 128 bytes of corrupted metadata buffer:
>   00000000e80f1917: 00 80 00 0b 00 80 00 07 3d ff 00 00 00 00 00 00  ........=.......
>   000000009604c005: 00 00 00 00 00 00 01 a0 00 00 00 00 00 00 00 00  ................
>   000000006b6fb2bf: e4 44 e3 97 b5 64 44 41 8b 84 60 0e 50 43 d9 bf  .D...dDA..`.PC..
>   00000000678978a2: 00 00 00 00 00 00 00 83 01 73 00 93 00 00 00 00  .........s......
>   00000000b28b247c: 99 29 1d 38 00 00 00 00 99 29 1d 40 00 00 00 00  .).8.....).@....
>   000000002b2a662c: 99 29 1d 48 00 00 00 00 99 49 11 00 00 00 00 00  .).H.....I......
>   00000000ea2ffbb8: 99 49 11 08 00 00 45 25 99 49 11 10 00 00 48 fe  .I....E%.I....H.
>   0000000069e86440: 99 49 11 18 00 00 4c 6b 99 49 11 20 00 00 4d 97  .I....Lk.I. ..M.
>   XFS (dm-0): xfs_do_force_shutdown(0x8) called from line 1423 of file fs/xfs/xfs_buf.c.  Return address = 00000000c0ff63c1
>   XFS (dm-0): Corruption of in-memory data detected.  Shutting down filesystem
>   XFS (dm-0): Please umount the filesystem and rectify the problem(s)
> 
> From the log above, we know xfs_buf->b_no is 0x178, but the block's hdr record
> its blkno is 0x1a0.
> 
> Fixes: 24df33b45ecf ("xfs: add CRC checking to dir2 leaf blocks")
> Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
> Suggested-by: Dave Chinner <david@fromorbit.com>
> ---
>  fs/xfs/libxfs/xfs_da_btree.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> index e576560b46e9..d11e6286e466 100644
> --- a/fs/xfs/libxfs/xfs_da_btree.c
> +++ b/fs/xfs/libxfs/xfs_da_btree.c
> @@ -2318,8 +2318,17 @@ xfs_da3_swap_lastblock(
>  	 * Copy the last block into the dead buffer and log it.
>  	 */
>  	memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
> -	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
>  	dead_info = dead_buf->b_addr;
> +	/*
> +	 * If xfs enable crc, the node/leaf block records its blkno, we
> +	 * must update it.
> +	 */

I'd combine this comment into the comment 3 lines above.

> +	if (xfs_has_crc(mp)) {
> +		struct xfs_da3_blkinfo *da3 = container_of(dead_info, struct xfs_da3_blkinfo, hdr);

Line length too long.

And using container_of() is rather unique an unusual, and not done
anywhere else in the code. dead_buf->b_addr is a void pointer,
so no cast is necessary:

		struct xfs_da3_blkinfo *da3 = dead_buf->b_addr;


> +
> +		da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf));
> +	}
> +	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
>  	/*
>  	 * Get values from the moved block.
>  	 */

And whitespace for readability before the next code block. IOWs:

	/*
	 * Copy the last block into the dead buffer, update the block info
	 * header and log it.
	 */
	memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
	if (xfs_has_crc(mp)) {
		struct xfs_da3_blkinfo *da3 = dead_buf->b_addr

		da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf));
	}
	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
	dead_info = dead_buf->b_addr;

	/*
	 * Get values from the moved block.
	 */

Cheers,

Dave.

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real
  2023-11-29  7:58 ` [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real Jiachen Zhang
@ 2023-11-29  9:04   ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2023-11-29  9:04 UTC (permalink / raw)
  To: Jiachen Zhang
  Cc: Chandan Babu R, Darrick J. Wong, Dave Chinner, Allison Henderson,
	Zhang Tianci, Brian Foster, Ben Myers, linux-xfs, linux-kernel,
	xieyongji, me, Christoph Hellwig

On Wed, Nov 29, 2023 at 03:58:31PM +0800, Jiachen Zhang wrote:
> In the case of returning -ENOSPC, ensure logflagsp is initialized by 0.
> Otherwise the caller __xfs_bunmapi will set uninitialized illegal
> tmp_logflags value into xfs log, which might cause unpredictable error
> in the log recovery procedure.
> 
> Also, remove the flags variable and set the *logflagsp directly, so that
> the code should be more robust in the long run.
> 
> Fixes: 1b24b633aafe ("xfs: move some more code into xfs_bmap_del_extent_real")
> Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index be62acffad6c..9435bd6c950b 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -5010,7 +5010,6 @@ xfs_bmap_del_extent_real(
>  	xfs_fileoff_t		del_endoff;	/* first offset past del */
>  	int			do_fx;	/* free extent at end of routine */
>  	int			error;	/* error return value */
> -	int			flags = 0;/* inode logging flags */
>  	struct xfs_bmbt_irec	got;	/* current extent entry */
>  	xfs_fileoff_t		got_endoff;	/* first offset past got */
>  	int			i;	/* temp state */
> @@ -5023,6 +5022,8 @@ xfs_bmap_del_extent_real(
>  	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
>  	struct xfs_bmbt_irec	old;
>  
> +	*logflagsp = 0;
> +
>  	mp = ip->i_mount;
>  	XFS_STATS_INC(mp, xs_del_exlist);
>  
> @@ -5048,10 +5049,12 @@ xfs_bmap_del_extent_real(
>  	if (tp->t_blk_res == 0 &&
>  	    ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
>  	    ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
> -	    del->br_startoff > got.br_startoff && del_endoff < got_endoff)
> -		return -ENOSPC;
> +	    del->br_startoff > got.br_startoff && del_endoff < got_endoff) {
> +		error = -ENOSPC;
> +		goto done;
> +	}

Now that you've added initialisation of logflagsp, the need for the
error stacking goto pattern goes away completely. Anywhere that has
a "goto done" can be converted to a direct 'return error' call and
the done label can be removed.

-Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-11-29  9:04 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-29  7:58 [PATCH v2 0/2] Fixes for ENOSPC xfs_remove Jiachen Zhang
2023-11-29  7:58 ` [PATCH v2 1/2] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real Jiachen Zhang
2023-11-29  9:04   ` Dave Chinner
2023-11-29  7:58 ` [PATCH v2 2/2] xfs: update dir3 leaf block metadata after swap Jiachen Zhang
2023-11-29  8:59   ` Dave Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox