* [PATCH 1/2] xfs: factor out __xfs_da3_node_read()
2023-06-13 3:04 [PATCH 0/2] xfs: fix an inconsistency issue Zhang Yi
@ 2023-06-13 3:04 ` Zhang Yi
2023-06-13 3:04 ` [PATCH 2/2] xfs: atomic drop extent entries when inactiving attr Zhang Yi
2023-06-13 3:44 ` [PATCH 0/2] xfs: fix an inconsistency issue Dave Chinner
2 siblings, 0 replies; 5+ messages in thread
From: Zhang Yi @ 2023-06-13 3:04 UTC (permalink / raw)
To: linux-xfs; +Cc: djwong, dchinner, yi.zhang, yi.zhang, leo.lilong, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
Factor out a wrapper __xfs_da3_node_read() from xfs_da3_node_read()
which could pass flags parameter.
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/xfs/libxfs/xfs_da_btree.c | 5 +++--
fs/xfs/libxfs/xfs_da_btree.h | 15 +++++++++++++--
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e576560b46e9..2e4e5aa22403 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -378,16 +378,17 @@ xfs_da3_node_set_type(
}
int
-xfs_da3_node_read(
+__xfs_da3_node_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
+ unsigned int flags,
struct xfs_buf **bpp,
int whichfork)
{
int error;
- error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
+ error = xfs_da_read_buf(tp, dp, bno, flags, bpp, whichfork,
&xfs_da3_node_buf_ops);
if (error || !*bpp || !tp)
return error;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index ffa3df5b2893..6d1da641f4f0 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -194,11 +194,22 @@ int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
*/
int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
xfs_da_state_blk_t *new_blk);
-int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, struct xfs_buf **bpp, int whichfork);
+int __xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t bno, unsigned int flags,
+ struct xfs_buf **bpp, int whichfork);
int xfs_da3_node_read_mapped(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_daddr_t mappedbno, struct xfs_buf **bpp,
int whichfork);
+static inline int
+xfs_da3_node_read(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ struct xfs_buf **bpp,
+ int whichfork)
+{
+ return __xfs_da3_node_read(tp, dp, bno, 0, bpp, whichfork);
+}
/*
* Utility routines.
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] xfs: atomic drop extent entries when inactiving attr
2023-06-13 3:04 [PATCH 0/2] xfs: fix an inconsistency issue Zhang Yi
2023-06-13 3:04 ` [PATCH 1/2] xfs: factor out __xfs_da3_node_read() Zhang Yi
@ 2023-06-13 3:04 ` Zhang Yi
2023-06-13 4:48 ` Dave Chinner
2023-06-13 3:44 ` [PATCH 0/2] xfs: fix an inconsistency issue Dave Chinner
2 siblings, 1 reply; 5+ messages in thread
From: Zhang Yi @ 2023-06-13 3:04 UTC (permalink / raw)
To: linux-xfs; +Cc: djwong, dchinner, yi.zhang, yi.zhang, leo.lilong, yukuai3
From: Zhang Yi <yi.zhang@huawei.com>
When inactiving an unlinked inode and it's attrs, if xlog is shutdown
either during or just after the process of recurse deleting attribute
nodes/leafs in xfs_attr3_root_inactive(), the log will records some
buffer cancel items, but doesn't contain the corresponding extent
entries and inode updates, this is incomplete and inconsistent. Because
of the inactiving process is not completed and the unlinked inode is
still in the agi_unlinked table, it will continue to be inactived after
replaying the log on the next mount, the attr node/leaf blocks' created
record before the cancel items could not be replayed but the inode
does. So we could get corrupted data when reading the canceled blocks.
XFS (pmem0): Metadata corruption detected at
xfs_da3_node_read_verify+0x53/0x220, xfs_da3_node block 0x78
XFS (pmem0): Unmount and run xfs_repair
XFS (pmem0): First 128 bytes of corrupted metadata buffer:
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
XFS (pmem0): metadata I/O error in "xfs_da_read_buf+0x104/0x190" at daddr 0x78 len 8 error 117
In order to fix the issue, we need to remove the extent entries, update
inode and attr btree atomically when staling attr node/leaf blocks. And
note that we may also need to log and update the parent attr node
entry when removing child or leaf attr block. Fortunately, it doesn't
have to be so complicated, we could leave the removed entres as
holes and skip them if we need to do re-inactiving, the whole node tree
will be removed completely in the end.
Cc: stable@vger.kernel.org
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/xfs/xfs_attr_inactive.c | 62 ++++++++++++++++++++++++++++----------
1 file changed, 46 insertions(+), 16 deletions(-)
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 5db87b34fb6e..bb83915ddcfe 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -23,6 +23,7 @@
#include "xfs_quota.h"
#include "xfs_dir2.h"
#include "xfs_error.h"
+#include "xfs_defer.h"
/*
* Invalidate any incore buffers associated with this remote attribute value
@@ -139,7 +140,8 @@ xfs_attr3_node_inactive(
xfs_daddr_t parent_blkno, child_blkno;
struct xfs_buf *child_bp;
struct xfs_da3_icnode_hdr ichdr;
- int error, i;
+ int error, i, done;
+ xfs_filblks_t count = mp->m_attr_geo->fsbcount;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
@@ -172,10 +174,13 @@ xfs_attr3_node_inactive(
* traversal of the tree so we may deal with many blocks
* before we come back to this one.
*/
- error = xfs_da3_node_read(*trans, dp, child_fsb, &child_bp,
- XFS_ATTR_FORK);
+ error = __xfs_da3_node_read(*trans, dp, child_fsb,
+ XFS_DABUF_MAP_HOLE_OK, &child_bp,
+ XFS_ATTR_FORK);
if (error)
return error;
+ if (!child_bp)
+ goto next_entry;
/* save for re-read later */
child_blkno = xfs_buf_daddr(child_bp);
@@ -207,14 +212,32 @@ xfs_attr3_node_inactive(
* Remove the subsidiary block from the cache and from the log.
*/
error = xfs_trans_get_buf(*trans, mp->m_ddev_targp,
- child_blkno,
- XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0,
- &child_bp);
+ child_blkno, XFS_FSB_TO_BB(mp, count),
+ 0, &child_bp);
if (error)
return error;
+
+ error = xfs_bunmapi(*trans, dp, child_fsb, count,
+ XFS_BMAPI_ATTRFORK, 0, &done);
+ if (error) {
+ xfs_trans_brelse(*trans, child_bp);
+ return error;
+ }
xfs_trans_binval(*trans, child_bp);
+
+ error = xfs_defer_finish(trans);
+ if (error)
+ return error;
child_bp = NULL;
+ /*
+ * Atomically commit the whole invalidate stuff.
+ */
+ error = xfs_trans_roll_inode(trans, dp);
+ if (error)
+ return error;
+
+next_entry:
/*
* If we're not done, re-read the parent to get the next
* child block number.
@@ -232,12 +255,6 @@ xfs_attr3_node_inactive(
xfs_trans_brelse(*trans, bp);
bp = NULL;
}
- /*
- * Atomically commit the whole invalidate stuff.
- */
- error = xfs_trans_roll_inode(trans, dp);
- if (error)
- return error;
}
return 0;
@@ -258,7 +275,8 @@ xfs_attr3_root_inactive(
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
xfs_daddr_t blkno;
- int error;
+ xfs_filblks_t count = mp->m_attr_geo->fsbcount;
+ int error, done;
/*
* Read block 0 to see what we have to work with.
@@ -266,8 +284,9 @@ xfs_attr3_root_inactive(
* the extents in reverse order the extent containing
* block 0 must still be there.
*/
- error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
- if (error)
+ error = __xfs_da3_node_read(*trans, dp, 0, XFS_DABUF_MAP_HOLE_OK,
+ &bp, XFS_ATTR_FORK);
+ if (error || !bp)
return error;
blkno = xfs_buf_daddr(bp);
@@ -298,7 +317,7 @@ xfs_attr3_root_inactive(
* Invalidate the incore copy of the root block.
*/
error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
- XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
+ XFS_FSB_TO_BB(mp, count), 0, &bp);
if (error)
return error;
error = bp->b_error;
@@ -306,7 +325,17 @@ xfs_attr3_root_inactive(
xfs_trans_brelse(*trans, bp);
return error;
}
+
+ error = xfs_bunmapi(*trans, dp, 0, count, XFS_BMAPI_ATTRFORK, 0, &done);
+ if (error) {
+ xfs_trans_brelse(*trans, bp);
+ return error;
+ }
xfs_trans_binval(*trans, bp); /* remove from cache */
+
+ error = xfs_defer_finish(trans);
+ if (error)
+ return error;
/*
* Commit the invalidate and start the next transaction.
*/
@@ -369,6 +398,7 @@ xfs_attr_inactive(
if (error)
goto out_cancel;
+ /* Remove the potential leftover remote attr blocks. */
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread