* [PATCH 0/3] xfs: fix inode use-after-free during log recovery
@ 2020-09-17 3:29 Darrick J. Wong
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 3:29 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, david
Hi all,
In this series, I try to fix a use-after-free that I discovered during
development of the dfops freezer, where BUI recovery releases the inode
even if it requeues itself. If the inode gets reclaimed, the fs
corrupts memory and explodes. The fix is to make the dfops freezer take
over ownership of the inodes if there's any more work to be done.
If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.
This is an extraordinary way to destroy everything. Enjoy!
Comments and questions are, as always, welcome.
--D
kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=fix-bmap-intent-recovery
---
fs/xfs/libxfs/xfs_defer.c | 57 ++++++++++++++++++++++-
fs/xfs/libxfs/xfs_defer.h | 21 ++++++++-
fs/xfs/libxfs/xfs_log_recover.h | 14 +++++-
fs/xfs/xfs_bmap_item.c | 95 +++++++++++++++------------------------
fs/xfs/xfs_icache.c | 41 +++++++++++++++++
fs/xfs/xfs_log_recover.c | 32 +++++++++++--
fs/xfs/xfs_trans.h | 6 --
7 files changed, 191 insertions(+), 75 deletions(-)
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-17 3:29 [PATCH 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
@ 2020-09-17 3:29 ` Darrick J. Wong
2020-09-17 5:09 ` Dave Chinner
2020-09-23 7:16 ` Christoph Hellwig
2020-09-17 3:29 ` [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering Darrick J. Wong
2020-09-17 3:29 ` [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover Darrick J. Wong
2 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 3:29 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, david
From: Darrick J. Wong <darrick.wong@oracle.com>
The bmap intent item checking code in xfs_bui_item_recover is spread all
over the function. We should check the recovered log item at the top
before we allocate any resources or do anything else, so do that.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_bmap_item.c | 57 ++++++++++++++++--------------------------------
1 file changed, 19 insertions(+), 38 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 0a0904a7650a..877afe76d76a 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -437,17 +437,13 @@ xfs_bui_item_recover(
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
/* Only one mapping operation per BUI... */
- if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- xfs_bui_release(buip);
- return -EFSCORRUPTED;
- }
+ if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+ goto garbage;
/*
* First check the validity of the extent described by the
@@ -458,29 +454,26 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ goto garbage;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
bmap->me_len >= mp->m_sb.sb_agblocks ||
inode_fsb >= mp->m_sb.sb_dblocks ||
- (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) {
- /*
- * This will pull the BUI from the AIL and
- * free the memory associated with it.
- */
- xfs_bui_release(buip);
- return -EFSCORRUPTED;
- }
+ (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
+ goto garbage;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
@@ -501,32 +494,17 @@ xfs_bui_item_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
goto err_inode;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
@@ -546,6 +524,9 @@ xfs_bui_item_recover(
xfs_irele(ip);
}
return error;
+garbage:
+ xfs_bui_release(buip);
+ return -EFSCORRUPTED;
}
STATIC bool
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 3:29 [PATCH 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
@ 2020-09-17 3:29 ` Darrick J. Wong
2020-09-17 5:13 ` Dave Chinner
2020-09-17 7:08 ` [PATCH v2 " Darrick J. Wong
2020-09-17 3:29 ` [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover Darrick J. Wong
2 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 3:29 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, david
From: Darrick J. Wong <darrick.wong@oracle.com>
In most places in XFS, we have a specific order in which we gather
resources: grab the inode, allocate a transaction, then lock the inode.
xfs_bui_item_recover doesn't do it in that order, so fix it to be more
consistent. This also makes the error bailout code a bit less weird.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_bmap_item.c | 40 +++++++++++++++++++++-------------------
1 file changed, 21 insertions(+), 19 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 877afe76d76a..6f589f04f358 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -475,25 +475,26 @@ xfs_bui_item_recover(
(bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
goto garbage;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
- if (error)
- return error;
-
- budp = xfs_trans_get_bud(tp, buip);
-
/* Grab the inode. */
- error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
+ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
if (error)
- goto err_inode;
+ return error;
error = xfs_qm_dqattach(ip);
if (error)
- goto err_inode;
+ goto err_rele;
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
+ /* Allocate transaction and do the work. */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ if (error)
+ goto err_rele;
+
+ budp = xfs_trans_get_bud(tp, buip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
@@ -501,7 +502,7 @@ xfs_bui_item_recover(
whichfork, bmap->me_startoff, bmap->me_startblock,
&count, state);
if (error)
- goto err_inode;
+ goto err_cancel;
if (count > 0) {
ASSERT(bui_type == XFS_BMAP_UNMAP);
@@ -512,18 +513,19 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
+ /* Commit transaction, which frees tp. */
error = xlog_recover_trans_commit(tp, dfcp);
+ if (error)
+ goto err_unlock;
+ return 0;
+
+err_cancel:
+ xfs_trans_cancel(tp);
+err_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
xfs_irele(ip);
return error;
-
-err_inode:
- xfs_trans_cancel(tp);
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_irele(ip);
- }
- return error;
garbage:
xfs_bui_release(buip);
return -EFSCORRUPTED;
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover
2020-09-17 3:29 [PATCH 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
2020-09-17 3:29 ` [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering Darrick J. Wong
@ 2020-09-17 3:29 ` Darrick J. Wong
2020-09-23 7:20 ` Christoph Hellwig
2020-09-24 6:06 ` [PATCH v2 " Darrick J. Wong
2 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 3:29 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, david
From: Darrick J. Wong <darrick.wong@oracle.com>
In xfs_bui_item_recover, there exists a use-after-free bug with regards
to the inode that is involved in the bmap replay operation. If the
mapping operation does not complete, we call xfs_bmap_unmap_extent to
create a deferred op to finish the unmapping work, and we retain a
pointer to the incore inode.
Unfortunately, the very next thing we do is commit the transaction and
drop the inode. If reclaim tears down the inode before we try to finish
the defer ops, we dereference garbage and blow up. Therefore, create a
way to join inodes to the defer ops freezer so that we can maintain the
xfs_inode reference until we're done with the inode.
Note: This imposes the requirement that there be enough memory to keep
every incore inode in memory throughout recovery.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_defer.c | 57 +++++++++++++++++++++++++++++++++++++--
fs/xfs/libxfs/xfs_defer.h | 21 ++++++++++++++
fs/xfs/libxfs/xfs_log_recover.h | 14 ++++++++--
fs/xfs/xfs_bmap_item.c | 8 +----
fs/xfs/xfs_icache.c | 41 ++++++++++++++++++++++++++++
fs/xfs/xfs_log_recover.c | 32 ++++++++++++++++++----
fs/xfs/xfs_trans.h | 6 ----
7 files changed, 156 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index edfb3b9856c8..97523b394932 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -16,6 +16,7 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include "xfs_icache.h"
/*
* Deferred Operations in XFS
@@ -555,6 +556,18 @@ xfs_defer_move(
xfs_defer_reset(stp);
}
+/* Unlock the inodes attached to this dfops capture device. */
+static void
+xfs_defer_capture_iunlock(
+ struct xfs_defer_capture *dfc)
+{
+ unsigned int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++)
+ xfs_iunlock(dfc->dfc_inodes[i], XFS_ILOCK_EXCL);
+ dfc->dfc_ilocked = false;
+}
+
/*
* Prepare a chain of fresh deferred ops work items to be completed later. Log
* recovery requires the ability to put off until later the actual finishing
@@ -568,14 +581,21 @@ xfs_defer_move(
* transaction is committed.
*
* Note that the capture state is passed up to the caller and must be freed
- * even if the transaction commit returns error.
+ * even if the transaction commit returns error. If inodes were passed in and
+ * a state capture structure was returned, the inodes are now owned by the
+ * state capture structure and the caller must not touch the inodes.
+ *
+ * If no structure is returned, the caller still owns the inodes.
*/
int
xfs_defer_capture(
struct xfs_trans *tp,
- struct xfs_defer_capture **dfcp)
+ struct xfs_defer_capture **dfcp,
+ struct xfs_inode *ip1,
+ struct xfs_inode *ip2)
{
struct xfs_defer_capture *dfc = NULL;
+ int error;
if (!list_empty(&tp->t_dfops)) {
dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
@@ -594,10 +614,31 @@ xfs_defer_capture(
dfc->dfc_tres.tr_logcount = tp->t_log_count;
dfc->dfc_tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
xfs_defer_reset(tp);
+
+ /*
+ * Transfer responsibility for unlocking and releasing the
+ * inodes to the capture structure.
+ */
+ if (!ip1)
+ ip1 = ip2;
+ dfc->dfc_ilocked = true;
+ dfc->dfc_inodes[0] = ip1;
+ if (ip2 != ip1)
+ dfc->dfc_inodes[1] = ip2;
}
*dfcp = dfc;
- return xfs_trans_commit(tp);
+ error = xfs_trans_commit(tp);
+ if (error)
+ return error;
+
+ /*
+ * Now that we've committed the transaction, it's safe to unlock the
+ * inodes that were passed in if we've taken over their ownership.
+ */
+ if (dfc)
+ xfs_defer_capture_iunlock(dfc);
+ return 0;
}
/* Attach a chain of captured deferred ops to a new transaction. */
@@ -609,6 +650,13 @@ xfs_defer_continue(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+ /* Lock and join the inodes to the new transaction. */
+ xfs_defer_continue_inodes(dfc, tp);
+ if (dfc->dfc_inodes[0])
+ xfs_trans_ijoin(tp, dfc->dfc_inodes[0], 0);
+ if (dfc->dfc_inodes[1])
+ xfs_trans_ijoin(tp, dfc->dfc_inodes[1], 0);
+
/* Move captured dfops chain and state to the transaction. */
list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
tp->t_flags |= dfc->dfc_tpflags;
@@ -624,5 +672,8 @@ xfs_defer_capture_free(
struct xfs_defer_capture *dfc)
{
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ if (dfc->dfc_ilocked)
+ xfs_defer_capture_iunlock(dfc);
+ xfs_defer_capture_irele(dfc);
kmem_free(dfc);
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index a788855aef69..4663a9026545 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -10,6 +10,12 @@ struct xfs_btree_cur;
struct xfs_defer_op_type;
struct xfs_defer_capture;
+/*
+ * Deferred operation item relogging limits.
+ */
+#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
+
/*
* Header for deferred operation list.
*/
@@ -78,15 +84,28 @@ struct xfs_defer_capture {
unsigned int dfc_tpflags;
unsigned int dfc_blkres;
struct xfs_trans_res dfc_tres;
+
+ /*
+ * Inodes to hold when we want to finish the deferred work items.
+ * Always set the first element before setting the second.
+ */
+ bool dfc_ilocked;
+ struct xfs_inode *dfc_inodes[XFS_DEFER_OPS_NR_INODES];
};
/*
* Functions to capture a chain of deferred operations and continue them later.
* This doesn't normally happen except log recovery.
*/
-int xfs_defer_capture(struct xfs_trans *tp, struct xfs_defer_capture **dfcp);
+int xfs_defer_capture(struct xfs_trans *tp, struct xfs_defer_capture **dfcp,
+ struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_defer_continue(struct xfs_defer_capture *dfc, struct xfs_trans *tp);
void xfs_defer_capture_free(struct xfs_mount *mp,
struct xfs_defer_capture *dfc);
+/* These functions must be provided by the xfs implementation. */
+void xfs_defer_continue_inodes(struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp);
+void xfs_defer_capture_irele(struct xfs_defer_capture *dfc);
+
#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index c3563c5c033c..e8aba7c6e851 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -127,7 +127,17 @@ void xlog_recover_iodone(struct xfs_buf *bp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
-int xlog_recover_trans_commit(struct xfs_trans *tp,
- struct xfs_defer_capture **dfcp);
+int xlog_recover_trans_commit_inodes(struct xfs_trans *tp,
+ struct xfs_defer_capture **dfcp, struct xfs_inode *ip1,
+ struct xfs_inode *ip2);
+
+static inline int
+xlog_recover_trans_commit(
+ struct xfs_trans *tp,
+ struct xfs_defer_capture **dfcp)
+{
+ return xlog_recover_trans_commit_inodes(tp, dfcp, NULL, NULL);
+}
+
#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 6f589f04f358..8461285a9dd9 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -513,15 +513,11 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
- /* Commit transaction, which frees tp. */
- error = xlog_recover_trans_commit(tp, dfcp);
- if (error)
- goto err_unlock;
- return 0;
+ /* Commit transaction, which frees the transaction and the inode. */
+ return xlog_recover_trans_commit_inodes(tp, dfcp, ip, NULL);
err_cancel:
xfs_trans_cancel(tp);
-err_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
err_rele:
xfs_irele(ip);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 101028ebb571..5b1202cac4ea 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -12,6 +12,7 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
@@ -1692,3 +1693,43 @@ xfs_start_block_reaping(
xfs_queue_eofblocks(mp);
xfs_queue_cowblocks(mp);
}
+
+/*
+ * Prepare the inodes to participate in further log intent item recovery.
+ * For now, that means attaching dquots and locking them, since libxfs doesn't
+ * know how to do that.
+ */
+void
+xfs_defer_continue_inodes(
+ struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp)
+{
+ int i;
+ int error;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++) {
+ error = xfs_qm_dqattach(dfc->dfc_inodes[i]);
+ if (error)
+ tp->t_mountp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
+ }
+
+ if (dfc->dfc_inodes[1])
+ xfs_lock_two_inodes(dfc->dfc_inodes[0], XFS_ILOCK_EXCL,
+ dfc->dfc_inodes[1], XFS_ILOCK_EXCL);
+ else if (dfc->dfc_inodes[0])
+ xfs_ilock(dfc->dfc_inodes[0], XFS_ILOCK_EXCL);
+ dfc->dfc_ilocked = true;
+}
+
+/* Release all the inodes attached to this dfops capture device. */
+void
+xfs_defer_capture_irele(
+ struct xfs_defer_capture *dfc)
+{
+ unsigned int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++) {
+ xfs_irele(dfc->dfc_inodes[i]);
+ dfc->dfc_inodes[i] = NULL;
+ }
+}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index f5748c8f5157..9ac2726d42b4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1791,16 +1791,38 @@ xlog_recover_release_intent(
spin_unlock(&ailp->ail_lock);
}
+static inline void
+xlog_recover_irele(
+ struct xfs_inode *ip)
+{
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+}
+
/*
- * Freeze any deferred ops and commit the transaction. This is the last step
- * needed to finish a log intent item that we recovered from the log.
+ * Freeze any deferred ops, commit the transaction, and deal with the inodes.
+ * This is the last step needed to finish a log intent item that we recovered
+ * from the log. If we captured deferred ops, the inodes are attached to it
+ * and must not be touched. If not, we have to unlock and free them ourselves.
*/
int
-xlog_recover_trans_commit(
+xlog_recover_trans_commit_inodes(
struct xfs_trans *tp,
- struct xfs_defer_capture **dfcp)
+ struct xfs_defer_capture **dfcp,
+ struct xfs_inode *ip1,
+ struct xfs_inode *ip2)
{
- return xfs_defer_capture(tp, dfcp);
+ int error;
+
+ error = xfs_defer_capture(tp, dfcp, ip1, ip2);
+ if (*dfcp)
+ return error;
+
+ if (ip2 && ip2 != ip1)
+ xlog_recover_irele(ip2);
+ if (ip1)
+ xlog_recover_irele(ip1);
+ return error;
}
/******************************************************************************
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c88240961aa1..995c1513693c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -97,12 +97,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
#define XFS_ITEM_LOCKED 2
#define XFS_ITEM_FLUSHING 3
-/*
- * Deferred operation item relogging limits.
- */
-#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
-#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
-
/*
* This is the structure maintained for every active transaction.
*/
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
@ 2020-09-17 5:09 ` Dave Chinner
2020-09-23 7:16 ` Christoph Hellwig
1 sibling, 0 replies; 18+ messages in thread
From: Dave Chinner @ 2020-09-17 5:09 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs
On Wed, Sep 16, 2020 at 08:29:30PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> The bmap intent item checking code in xfs_bui_item_recover is spread all
> over the function. We should check the recovered log item at the top
> before we allocate any resources or do anything else, so do that.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/xfs_bmap_item.c | 57 ++++++++++++++++--------------------------------
> 1 file changed, 19 insertions(+), 38 deletions(-)
Looks good - I have a very similar patch here....
Reviewed-by: Dave Chinner <dchinner@redhat.com>
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 3:29 ` [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering Darrick J. Wong
@ 2020-09-17 5:13 ` Dave Chinner
2020-09-17 6:47 ` Darrick J. Wong
2020-09-17 7:08 ` [PATCH v2 " Darrick J. Wong
1 sibling, 1 reply; 18+ messages in thread
From: Dave Chinner @ 2020-09-17 5:13 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs
On Wed, Sep 16, 2020 at 08:29:36PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> In most places in XFS, we have a specific order in which we gather
> resources: grab the inode, allocate a transaction, then lock the inode.
> xfs_bui_item_recover doesn't do it in that order, so fix it to be more
> consistent. This also makes the error bailout code a bit less weird.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/xfs_bmap_item.c | 40 +++++++++++++++++++++-------------------
> 1 file changed, 21 insertions(+), 19 deletions(-)
This probably needs to go before the xfs_qm_dqattach() fix, or
the dqattach fix need to come after this....
>
>
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 877afe76d76a..6f589f04f358 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -475,25 +475,26 @@ xfs_bui_item_recover(
> (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
> goto garbage;
>
> - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
> - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
> - if (error)
> - return error;
> -
> - budp = xfs_trans_get_bud(tp, buip);
> -
> /* Grab the inode. */
> - error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
> + error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
> if (error)
> - goto err_inode;
> + return error;
>
> error = xfs_qm_dqattach(ip);
> if (error)
> - goto err_inode;
> + goto err_rele;
>
> if (VFS_I(ip)->i_nlink == 0)
> xfs_iflags_set(ip, XFS_IRECOVERY);
>
> + /* Allocate transaction and do the work. */
> + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
> + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
> + if (error)
> + goto err_rele;
Hmmmm - don't all the error cased before we call xfs_trans_get_bud()
need to release the bui?
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 5:13 ` Dave Chinner
@ 2020-09-17 6:47 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 6:47 UTC (permalink / raw)
To: Dave Chinner; +Cc: linux-xfs
On Thu, Sep 17, 2020 at 03:13:33PM +1000, Dave Chinner wrote:
> On Wed, Sep 16, 2020 at 08:29:36PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > In most places in XFS, we have a specific order in which we gather
> > resources: grab the inode, allocate a transaction, then lock the inode.
> > xfs_bui_item_recover doesn't do it in that order, so fix it to be more
> > consistent. This also makes the error bailout code a bit less weird.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> > fs/xfs/xfs_bmap_item.c | 40 +++++++++++++++++++++-------------------
> > 1 file changed, 21 insertions(+), 19 deletions(-)
>
> This probably needs to go before the xfs_qm_dqattach() fix, or
> the dqattach fix need to come after this....
<nod> I'll fix the previous patch.
> >
> > diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> > index 877afe76d76a..6f589f04f358 100644
> > --- a/fs/xfs/xfs_bmap_item.c
> > +++ b/fs/xfs/xfs_bmap_item.c
> > @@ -475,25 +475,26 @@ xfs_bui_item_recover(
> > (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
> > goto garbage;
> >
> > - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
> > - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
> > - if (error)
> > - return error;
> > -
> > - budp = xfs_trans_get_bud(tp, buip);
> > -
> > /* Grab the inode. */
> > - error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
> > + error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
> > if (error)
> > - goto err_inode;
> > + return error;
> >
> > error = xfs_qm_dqattach(ip);
> > if (error)
> > - goto err_inode;
> > + goto err_rele;
> >
> > if (VFS_I(ip)->i_nlink == 0)
> > xfs_iflags_set(ip, XFS_IRECOVERY);
> >
> > + /* Allocate transaction and do the work. */
> > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
> > + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
> > + if (error)
> > + goto err_rele;
>
> Hmmmm - don't all the error cased before we call xfs_trans_get_bud()
> need to release the bui?
Yes, I think so. Come to think of it, the other intent items seem like
they have the same bug.
--D
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 3:29 ` [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering Darrick J. Wong
2020-09-17 5:13 ` Dave Chinner
@ 2020-09-17 7:08 ` Darrick J. Wong
2020-09-17 8:19 ` Dave Chinner
2020-09-23 7:17 ` Christoph Hellwig
1 sibling, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-17 7:08 UTC (permalink / raw)
To: linux-xfs, david
From: Darrick J. Wong <darrick.wong@oracle.com>
In most places in XFS, we have a specific order in which we gather
resources: grab the inode, allocate a transaction, then lock the inode.
xfs_bui_item_recover doesn't do it in that order, so fix it to be more
consistent. This also makes the error bailout code a bit less weird.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: fix the error-out paths to free the BUI if the BUD hasn't been
created
---
fs/xfs/xfs_bmap_item.c | 49 +++++++++++++++++++++++++-----------------------
1 file changed, 26 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 4e5aa29f75b7..f088cfd495bd 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -432,7 +432,7 @@ xfs_bui_item_recover(
struct xfs_inode *ip = NULL;
struct xfs_mount *mp = lip->li_mountp;
struct xfs_map_extent *bmap;
- struct xfs_bud_log_item *budp;
+ struct xfs_bud_log_item *budp = NULL;
xfs_fsblock_t startblock_fsb;
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
@@ -475,27 +475,26 @@ xfs_bui_item_recover(
(bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
goto garbage;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
- if (error) {
- xfs_bui_release(buip);
- return error;
- }
-
- budp = xfs_trans_get_bud(tp, buip);
-
/* Grab the inode. */
- error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
+ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
if (error)
- goto err_inode;
+ goto err_bui;
- error = xfs_qm_dqattach_locked(ip, false);
+ error = xfs_qm_dqattach(ip);
if (error)
- goto err_inode;
+ goto err_rele;
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
+ /* Allocate transaction and do the work. */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ if (error)
+ goto err_rele;
+
+ budp = xfs_trans_get_bud(tp, buip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
@@ -503,7 +502,7 @@ xfs_bui_item_recover(
whichfork, bmap->me_startoff, bmap->me_startblock,
&count, state);
if (error)
- goto err_inode;
+ goto err_cancel;
if (count > 0) {
ASSERT(bui_type == XFS_BMAP_UNMAP);
@@ -514,17 +513,21 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
+ /* Commit transaction, which frees tp. */
error = xlog_recover_trans_commit(tp, dfcp);
+ if (error)
+ goto err_unlock;
+ return 0;
+
+err_cancel:
+ xfs_trans_cancel(tp);
+err_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+err_rele:
xfs_irele(ip);
- return error;
-
-err_inode:
- xfs_trans_cancel(tp);
- if (ip) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_irele(ip);
- }
+err_bui:
+ if (!budp)
+ xfs_bui_release(buip);
return error;
garbage:
xfs_bui_release(buip);
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 7:08 ` [PATCH v2 " Darrick J. Wong
@ 2020-09-17 8:19 ` Dave Chinner
2020-09-23 7:17 ` Christoph Hellwig
1 sibling, 0 replies; 18+ messages in thread
From: Dave Chinner @ 2020-09-17 8:19 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs
On Thu, Sep 17, 2020 at 12:08:02AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> In most places in XFS, we have a specific order in which we gather
> resources: grab the inode, allocate a transaction, then lock the inode.
> xfs_bui_item_recover doesn't do it in that order, so fix it to be more
> consistent. This also makes the error bailout code a bit less weird.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> v2: fix the error-out paths to free the BUI if the BUD hasn't been
> created
> ---
> fs/xfs/xfs_bmap_item.c | 49 +++++++++++++++++++++++++-----------------------
> 1 file changed, 26 insertions(+), 23 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 4e5aa29f75b7..f088cfd495bd 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -432,7 +432,7 @@ xfs_bui_item_recover(
> struct xfs_inode *ip = NULL;
> struct xfs_mount *mp = lip->li_mountp;
> struct xfs_map_extent *bmap;
> - struct xfs_bud_log_item *budp;
> + struct xfs_bud_log_item *budp = NULL;
> xfs_fsblock_t startblock_fsb;
> xfs_fsblock_t inode_fsb;
> xfs_filblks_t count;
> @@ -475,27 +475,26 @@ xfs_bui_item_recover(
> (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
> goto garbage;
>
> - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
> - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
> - if (error) {
> - xfs_bui_release(buip);
> - return error;
> - }
> -
> - budp = xfs_trans_get_bud(tp, buip);
> -
> /* Grab the inode. */
> - error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip);
> + error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
> if (error)
> - goto err_inode;
> + goto err_bui;
err_bui could be combined with the garbage error return if error is
initialised to -EFSCORRUPTED.
Otherwise it looks OK.
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
2020-09-17 5:09 ` Dave Chinner
@ 2020-09-23 7:16 ` Christoph Hellwig
2020-09-23 15:22 ` Darrick J. Wong
1 sibling, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2020-09-23 7:16 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, david
On Wed, Sep 16, 2020 at 08:29:30PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> The bmap intent item checking code in xfs_bui_item_recover is spread all
> over the function. We should check the recovered log item at the top
> before we allocate any resources or do anything else, so do that.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/xfs_bmap_item.c | 57 ++++++++++++++++--------------------------------
> 1 file changed, 19 insertions(+), 38 deletions(-)
>
>
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 0a0904a7650a..877afe76d76a 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -437,17 +437,13 @@ xfs_bui_item_recover(
> xfs_fsblock_t inode_fsb;
> xfs_filblks_t count;
> xfs_exntst_t state;
> - enum xfs_bmap_intent_type type;
> - bool op_ok;
> unsigned int bui_type;
> int whichfork;
> int error = 0;
>
> /* Only one mapping operation per BUI... */
> - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> - xfs_bui_release(buip);
> - return -EFSCORRUPTED;
> - }
> + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
> + goto garbage;
We don't really need the xfs_bui_release any more, and can stick to
plain "return -EFSCORRUPTED" instead of the goto, but I suspect the
previous patch has taken care of that and you've rebased already?
Otherwise looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering
2020-09-17 7:08 ` [PATCH v2 " Darrick J. Wong
2020-09-17 8:19 ` Dave Chinner
@ 2020-09-23 7:17 ` Christoph Hellwig
1 sibling, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2020-09-23 7:17 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, david
On Thu, Sep 17, 2020 at 12:08:02AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> In most places in XFS, we have a specific order in which we gather
> resources: grab the inode, allocate a transaction, then lock the inode.
> xfs_bui_item_recover doesn't do it in that order, so fix it to be more
> consistent. This also makes the error bailout code a bit less weird.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover
2020-09-17 3:29 ` [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover Darrick J. Wong
@ 2020-09-23 7:20 ` Christoph Hellwig
2020-09-23 15:55 ` Darrick J. Wong
2020-09-24 6:06 ` [PATCH v2 " Darrick J. Wong
1 sibling, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2020-09-23 7:20 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, david
On Wed, Sep 16, 2020 at 08:29:42PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> In xfs_bui_item_recover, there exists a use-after-free bug with regards
> to the inode that is involved in the bmap replay operation. If the
> mapping operation does not complete, we call xfs_bmap_unmap_extent to
> create a deferred op to finish the unmapping work, and we retain a
> pointer to the incore inode.
>
> Unfortunately, the very next thing we do is commit the transaction and
> drop the inode. If reclaim tears down the inode before we try to finish
> the defer ops, we dereference garbage and blow up. Therefore, create a
> way to join inodes to the defer ops freezer so that we can maintain the
> xfs_inode reference until we're done with the inode.
>
> Note: This imposes the requirement that there be enough memory to keep
> every incore inode in memory throughout recovery.
As in every inode that gets recovered, not every inode in the system.
I think the commit log could use a very slight tweak here.
Didn't we think of just storing the inode number for recovery, or
did this turn out too complicated? (I'm pretty sure we dicussed this
in detail before, but my memory gets foggy).
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-23 7:16 ` Christoph Hellwig
@ 2020-09-23 15:22 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-23 15:22 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, david
On Wed, Sep 23, 2020 at 08:16:14AM +0100, Christoph Hellwig wrote:
> On Wed, Sep 16, 2020 at 08:29:30PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > The bmap intent item checking code in xfs_bui_item_recover is spread all
> > over the function. We should check the recovered log item at the top
> > before we allocate any resources or do anything else, so do that.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> > fs/xfs/xfs_bmap_item.c | 57 ++++++++++++++++--------------------------------
> > 1 file changed, 19 insertions(+), 38 deletions(-)
> >
> >
> > diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> > index 0a0904a7650a..877afe76d76a 100644
> > --- a/fs/xfs/xfs_bmap_item.c
> > +++ b/fs/xfs/xfs_bmap_item.c
> > @@ -437,17 +437,13 @@ xfs_bui_item_recover(
> > xfs_fsblock_t inode_fsb;
> > xfs_filblks_t count;
> > xfs_exntst_t state;
> > - enum xfs_bmap_intent_type type;
> > - bool op_ok;
> > unsigned int bui_type;
> > int whichfork;
> > int error = 0;
> >
> > /* Only one mapping operation per BUI... */
> > - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
> > - xfs_bui_release(buip);
> > - return -EFSCORRUPTED;
> > - }
> > + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
> > + goto garbage;
>
> We don't really need the xfs_bui_release any more, and can stick to
> plain "return -EFSCORRUPTED" instead of the goto, but I suspect the
> previous patch has taken care of that and you've rebased already?
Yep.
--D
> Otherwise looks good:
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover
2020-09-23 7:20 ` Christoph Hellwig
@ 2020-09-23 15:55 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-23 15:55 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs, david
On Wed, Sep 23, 2020 at 08:20:15AM +0100, Christoph Hellwig wrote:
> On Wed, Sep 16, 2020 at 08:29:42PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > In xfs_bui_item_recover, there exists a use-after-free bug with regards
> > to the inode that is involved in the bmap replay operation. If the
> > mapping operation does not complete, we call xfs_bmap_unmap_extent to
> > create a deferred op to finish the unmapping work, and we retain a
> > pointer to the incore inode.
> >
> > Unfortunately, the very next thing we do is commit the transaction and
> > drop the inode. If reclaim tears down the inode before we try to finish
> > the defer ops, we dereference garbage and blow up. Therefore, create a
> > way to join inodes to the defer ops freezer so that we can maintain the
> > xfs_inode reference until we're done with the inode.
> >
> > Note: This imposes the requirement that there be enough memory to keep
> > every incore inode in memory throughout recovery.
>
> As in every inode that gets recovered, not every inode in the system.
> I think the commit log could use a very slight tweak here.
>
> Didn't we think of just storing the inode number for recovery, or
> did this turn out too complicated? (I'm pretty sure we dicussed this
> in detail before, but my memory gets foggy).
Initially I did just store the inode numbers, but that made the code
more clunky due to needing more _iget and _irele calls, and a bunch of
error handling for that. Dave suggested on irc that I should retain the
reference to the incore inode to simplify the initial patch, and if we
run into ENOMEM then we can fix it later.
I wasn't 100% convinced of that, but Dave or Brian or someone (memory
foggy, don't remember who) countered that the system recovering the fs
is usually the system that crashed in the first place, and it certainly
had enough RAM to hold the inodes.
I think the link you're looking for is[1].
--D
[1] https://lore.kernel.org/linux-xfs/158864123329.184729.14504239314355330619.stgit@magnolia/
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 3/3] xfs: fix an incore inode UAF in xfs_bui_recover
2020-09-17 3:29 ` [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover Darrick J. Wong
2020-09-23 7:20 ` Christoph Hellwig
@ 2020-09-24 6:06 ` Darrick J. Wong
1 sibling, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-24 6:06 UTC (permalink / raw)
To: linux-xfs, david, Christoph Hellwig
From: Darrick J. Wong <darrick.wong@oracle.com>
In xfs_bui_item_recover, there exists a use-after-free bug with regards
to the inode that is involved in the bmap replay operation. If the
mapping operation does not complete, we call xfs_bmap_unmap_extent to
create a deferred op to finish the unmapping work, and we retain a
pointer to the incore inode.
Unfortunately, the very next thing we do is commit the transaction and
drop the inode. If reclaim tears down the inode before we try to finish
the defer ops, we dereference garbage and blow up. Therefore, create a
way to join inodes to the defer ops freezer so that we can maintain the
xfs_inode reference until we're done with the inode.
Note: This imposes the requirement that there be enough memory to keep
every incore inode in memory throughout recovery.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: rebase on v2 of the previous patchset, and make it more clear that
xfs_defer_capture takes over ownership of the inodes
---
fs/xfs/libxfs/xfs_defer.c | 57 ++++++++++++++++++++++++++++++++++++++-
fs/xfs/libxfs/xfs_defer.h | 21 ++++++++++++++
fs/xfs/libxfs/xfs_log_recover.h | 11 ++++++--
fs/xfs/xfs_bmap_item.c | 8 +----
fs/xfs/xfs_icache.c | 41 ++++++++++++++++++++++++++++
fs/xfs/xfs_log_recover.c | 33 ++++++++++++++++++++---
fs/xfs/xfs_trans.h | 6 ----
7 files changed, 156 insertions(+), 21 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index b693d2c42c27..a99252271b24 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -16,6 +16,7 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include "xfs_icache.h"
/*
* Deferred Operations in XFS
@@ -555,6 +556,18 @@ xfs_defer_move(
xfs_defer_reset(stp);
}
+/* Unlock the inodes attached to this dfops capture device. */
+static void
+xfs_defer_capture_iunlock(
+ struct xfs_defer_capture *dfc)
+{
+ unsigned int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++)
+ xfs_iunlock(dfc->dfc_inodes[i], XFS_ILOCK_EXCL);
+ dfc->dfc_ilocked = false;
+}
+
/*
* Prepare a chain of fresh deferred ops work items to be completed later. Log
* recovery requires the ability to put off until later the actual finishing
@@ -568,17 +581,26 @@ xfs_defer_move(
* transaction is committed.
*
* Note that the capture state is passed up to the caller and must be freed
- * even if the transaction commit returns error.
+ * even if the transaction commit returns error. If inodes were passed in and
+ * a state capture structure was returned, the inodes are now owned by the
+ * state capture structure and the caller must not touch the inodes.
+ *
+ * If *ipp1 or *ipp2 remain set, the caller still owns the inodes.
*/
int
xfs_defer_capture(
struct xfs_trans *tp,
- struct list_head *capture_list)
+ struct list_head *capture_list,
+ struct xfs_inode **ipp1,
+ struct xfs_inode **ipp2)
{
struct xfs_defer_capture *dfc;
struct xfs_mount *mp = tp->t_mountp;
int error;
+ /* Do not pass in an ipp2 without an ipp1. */
+ ASSERT(ipp1 || !ipp2);
+
if (list_empty(&tp->t_dfops))
return xfs_trans_commit(tp);
@@ -600,6 +622,21 @@ xfs_defer_capture(
dfc->dfc_tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
xfs_defer_reset(tp);
+ /*
+ * Transfer responsibility for unlocking and releasing the inodes to
+ * the capture structure.
+ */
+ dfc->dfc_ilocked = true;
+ if (ipp1) {
+ dfc->dfc_inodes[0] = *ipp1;
+ *ipp1 = NULL;
+ }
+ if (ipp2) {
+ if (*ipp2 != dfc->dfc_inodes[0])
+ dfc->dfc_inodes[1] = *ipp2;
+ *ipp2 = NULL;
+ }
+
/*
* Commit the transaction. If that fails, clean up the defer ops and
* the dfc that we just created. Otherwise, add the dfc to the list.
@@ -610,6 +647,12 @@ xfs_defer_capture(
return error;
}
+ /*
+ * Now that we've committed the transaction, it's safe to unlock the
+ * inodes that were passed in if we've taken over their ownership.
+ */
+ xfs_defer_capture_iunlock(dfc);
+
list_add_tail(&dfc->dfc_list, capture_list);
return 0;
}
@@ -623,6 +666,13 @@ xfs_defer_continue(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+ /* Lock and join the inodes to the new transaction. */
+ xfs_defer_continue_inodes(dfc, tp);
+ if (dfc->dfc_inodes[0])
+ xfs_trans_ijoin(tp, dfc->dfc_inodes[0], 0);
+ if (dfc->dfc_inodes[1])
+ xfs_trans_ijoin(tp, dfc->dfc_inodes[1], 0);
+
/* Move captured dfops chain and state to the transaction. */
list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
tp->t_flags |= dfc->dfc_tpflags;
@@ -638,5 +688,8 @@ xfs_defer_capture_free(
struct xfs_defer_capture *dfc)
{
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ if (dfc->dfc_ilocked)
+ xfs_defer_capture_iunlock(dfc);
+ xfs_defer_capture_irele(dfc);
kmem_free(dfc);
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 254d48e6e4dc..c52c57d35af6 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -10,6 +10,12 @@ struct xfs_btree_cur;
struct xfs_defer_op_type;
struct xfs_defer_capture;
+/*
+ * Deferred operation item relogging limits.
+ */
+#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
+
/*
* Header for deferred operation list.
*/
@@ -78,15 +84,28 @@ struct xfs_defer_capture {
unsigned int dfc_tpflags;
unsigned int dfc_blkres;
struct xfs_trans_res dfc_tres;
+
+ /*
+ * Inodes to hold when we want to finish the deferred work items.
+ * Always set the first element before setting the second.
+ */
+ bool dfc_ilocked;
+ struct xfs_inode *dfc_inodes[XFS_DEFER_OPS_NR_INODES];
};
/*
* Functions to capture a chain of deferred operations and continue them later.
* This doesn't normally happen except log recovery.
*/
-int xfs_defer_capture(struct xfs_trans *tp, struct list_head *capture_list);
+int xfs_defer_capture(struct xfs_trans *tp, struct list_head *capture_list,
+ struct xfs_inode **ipp1, struct xfs_inode **ipp2);
void xfs_defer_continue(struct xfs_defer_capture *dfc, struct xfs_trans *tp);
void xfs_defer_capture_free(struct xfs_mount *mp,
struct xfs_defer_capture *dfc);
+/* These functions must be provided by the xfs implementation. */
+void xfs_defer_continue_inodes(struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp);
+void xfs_defer_capture_irele(struct xfs_defer_capture *dfc);
+
#endif /* __XFS_DEFER_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 4f752096f7c7..8cf5e23c0aef 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -125,7 +125,14 @@ void xlog_recover_iodone(struct xfs_buf *bp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
-int xlog_recover_trans_commit(struct xfs_trans *tp,
- struct list_head *capture_list);
+int xlog_recover_trans_commit_inodes(struct xfs_trans *tp,
+ struct list_head *capture_list, struct xfs_inode *ip1,
+ struct xfs_inode *ip2);
+
+static inline int
+xlog_recover_trans_commit(struct xfs_trans *tp, struct list_head *capture_list)
+{
+ return xlog_recover_trans_commit_inodes(tp, capture_list, NULL, NULL);
+}
#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index b6d3a5766148..fa52bfd66ce1 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -513,15 +513,11 @@ xfs_bui_item_recover(
xfs_bmap_unmap_extent(tp, ip, &irec);
}
- /* Commit transaction, which frees tp. */
- error = xlog_recover_trans_commit(tp, capture_list);
- if (error)
- goto err_unlock;
- return 0;
+ /* Commit transaction, which frees the transaction and the inode. */
+ return xlog_recover_trans_commit_inodes(tp, capture_list, ip, NULL);
err_cancel:
xfs_trans_cancel(tp);
-err_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
err_rele:
xfs_irele(ip);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 101028ebb571..5b1202cac4ea 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -12,6 +12,7 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
@@ -1692,3 +1693,43 @@ xfs_start_block_reaping(
xfs_queue_eofblocks(mp);
xfs_queue_cowblocks(mp);
}
+
+/*
+ * Prepare the inodes to participate in further log intent item recovery.
+ * For now, that means attaching dquots and locking them, since libxfs doesn't
+ * know how to do that.
+ */
+void
+xfs_defer_continue_inodes(
+ struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp)
+{
+ int i;
+ int error;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++) {
+ error = xfs_qm_dqattach(dfc->dfc_inodes[i]);
+ if (error)
+ tp->t_mountp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
+ }
+
+ if (dfc->dfc_inodes[1])
+ xfs_lock_two_inodes(dfc->dfc_inodes[0], XFS_ILOCK_EXCL,
+ dfc->dfc_inodes[1], XFS_ILOCK_EXCL);
+ else if (dfc->dfc_inodes[0])
+ xfs_ilock(dfc->dfc_inodes[0], XFS_ILOCK_EXCL);
+ dfc->dfc_ilocked = true;
+}
+
+/* Release all the inodes attached to this dfops capture device. */
+void
+xfs_defer_capture_irele(
+ struct xfs_defer_capture *dfc)
+{
+ unsigned int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dfc->dfc_inodes[i]; i++) {
+ xfs_irele(dfc->dfc_inodes[i]);
+ dfc->dfc_inodes[i] = NULL;
+ }
+}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ab9825ab14d5..37ba7a105b59 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1791,17 +1791,42 @@ xlog_recover_release_intent(
spin_unlock(&ailp->ail_lock);
}
+static inline void
+xlog_recover_irele(
+ struct xfs_inode *ip)
+{
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+}
+
/*
* Freeze any deferred ops and commit the transaction. This is the last step
* needed to finish a log intent item that we recovered from the log, and will
- * take care of releasing all the relevant resources.
+ * take care of releasing all the relevant resources. If we captured deferred
+ * ops, the inodes are attached to it and must not be touched. If not, we have
+ * to unlock and free them ourselves.
*/
int
-xlog_recover_trans_commit(
+xlog_recover_trans_commit_inodes(
struct xfs_trans *tp,
- struct list_head *capture_list)
+ struct list_head *capture_list,
+ struct xfs_inode *ip1,
+ struct xfs_inode *ip2)
{
- return xfs_defer_capture(tp, capture_list);
+ int error;
+
+ error = xfs_defer_capture(tp, capture_list, &ip1, &ip2);
+
+ /*
+ * If we still have references to the inodes, either the capture
+ * process did nothing or it failed; either way, we still own the
+ * inodes, so unlock and release them.
+ */
+ if (ip2 && ip2 != ip1)
+ xlog_recover_irele(ip2);
+ if (ip1)
+ xlog_recover_irele(ip1);
+ return error;
}
/******************************************************************************
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index b68272666ce1..e27df685d3cd 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -96,12 +96,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
#define XFS_ITEM_LOCKED 2
#define XFS_ITEM_FLUSHING 3
-/*
- * Deferred operation item relogging limits.
- */
-#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
-#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
-
/*
* This is the structure maintained for every active transaction.
*/
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-27 23:41 [PATCH v2 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
@ 2020-09-27 23:41 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-27 23:41 UTC (permalink / raw)
To: darrick.wong; +Cc: Dave Chinner, Christoph Hellwig, linux-xfs, david, hch
From: Darrick J. Wong <darrick.wong@oracle.com>
The bmap intent item checking code in xfs_bui_item_recover is spread all
over the function. We should check the recovered log item at the top
before we allocate any resources or do anything else, so do that.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_bmap_item.c | 38 ++++++++++++--------------------------
1 file changed, 12 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index b73f0a0890a2..8f20eac72287 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -437,8 +437,6 @@ xfs_bui_item_recover(
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
@@ -456,16 +454,19 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ return -EFSCORRUPTED;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
@@ -493,32 +494,17 @@ xfs_bui_item_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
goto err_inode;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-09-29 17:43 [PATCH v3 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
@ 2020-09-29 17:43 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-09-29 17:43 UTC (permalink / raw)
To: darrick.wong; +Cc: Dave Chinner, Christoph Hellwig, linux-xfs, david, hch
From: Darrick J. Wong <darrick.wong@oracle.com>
The bmap intent item checking code in xfs_bui_item_recover is spread all
over the function. We should check the recovered log item at the top
before we allocate any resources or do anything else, so do that.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_bmap_item.c | 38 ++++++++++++--------------------------
1 file changed, 12 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 126df48dae5f..c1f2cc3c42cb 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -437,8 +437,6 @@ xfs_bui_item_recover(
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
@@ -456,16 +454,19 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ return -EFSCORRUPTED;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
@@ -493,32 +494,17 @@ xfs_bui_item_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
goto err_inode;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 1/3] xfs: clean up bmap intent item recovery checking
2020-10-05 18:20 [PATCH v4 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
@ 2020-10-05 18:20 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2020-10-05 18:20 UTC (permalink / raw)
To: darrick.wong; +Cc: Dave Chinner, Christoph Hellwig, linux-xfs, david, hch
From: Darrick J. Wong <darrick.wong@oracle.com>
The bmap intent item checking code in xfs_bui_item_recover is spread all
over the function. We should check the recovered log item at the top
before we allocate any resources or do anything else, so do that.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_bmap_item.c | 38 ++++++++++++--------------------------
1 file changed, 12 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 126df48dae5f..c1f2cc3c42cb 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -437,8 +437,6 @@ xfs_bui_item_recover(
xfs_fsblock_t inode_fsb;
xfs_filblks_t count;
xfs_exntst_t state;
- enum xfs_bmap_intent_type type;
- bool op_ok;
unsigned int bui_type;
int whichfork;
int error = 0;
@@ -456,16 +454,19 @@ xfs_bui_item_recover(
XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
XFS_INO_TO_FSB(mp, bmap->me_owner)));
- switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+ whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ switch (bui_type) {
case XFS_BMAP_MAP:
case XFS_BMAP_UNMAP:
- op_ok = true;
break;
default:
- op_ok = false;
- break;
+ return -EFSCORRUPTED;
}
- if (!op_ok || startblock_fsb == 0 ||
+ if (startblock_fsb == 0 ||
bmap->me_len == 0 ||
inode_fsb == 0 ||
startblock_fsb >= mp->m_sb.sb_dblocks ||
@@ -493,32 +494,17 @@ xfs_bui_item_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- /* Process deferred bmap item. */
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- switch (bui_type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- type = bui_type;
- break;
- default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto err_inode;
- }
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
- bmap->me_startoff, bmap->me_startblock, &count, state);
+ error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+ whichfork, bmap->me_startoff, bmap->me_startblock,
+ &count, state);
if (error)
goto err_inode;
if (count > 0) {
- ASSERT(type == XFS_BMAP_UNMAP);
+ ASSERT(bui_type == XFS_BMAP_UNMAP);
irec.br_startblock = bmap->me_startblock;
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
^ permalink raw reply related [flat|nested] 18+ messages in thread
end of thread, other threads:[~2020-10-05 18:20 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-09-17 3:29 [PATCH 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-09-17 3:29 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
2020-09-17 5:09 ` Dave Chinner
2020-09-23 7:16 ` Christoph Hellwig
2020-09-23 15:22 ` Darrick J. Wong
2020-09-17 3:29 ` [PATCH 2/3] xfs: clean up xfs_bui_item_recover iget/trans_alloc/ilock ordering Darrick J. Wong
2020-09-17 5:13 ` Dave Chinner
2020-09-17 6:47 ` Darrick J. Wong
2020-09-17 7:08 ` [PATCH v2 " Darrick J. Wong
2020-09-17 8:19 ` Dave Chinner
2020-09-23 7:17 ` Christoph Hellwig
2020-09-17 3:29 ` [PATCH 3/3] xfs: fix an incore inode UAF in xfs_bui_recover Darrick J. Wong
2020-09-23 7:20 ` Christoph Hellwig
2020-09-23 15:55 ` Darrick J. Wong
2020-09-24 6:06 ` [PATCH v2 " Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2020-09-27 23:41 [PATCH v2 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-09-27 23:41 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
2020-09-29 17:43 [PATCH v3 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-09-29 17:43 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
2020-10-05 18:20 [PATCH v4 0/3] xfs: fix inode use-after-free during log recovery Darrick J. Wong
2020-10-05 18:20 ` [PATCH 1/3] xfs: clean up bmap intent item recovery checking Darrick J. Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).