* [PATCH] xfs: fix hung when transaction commit fail in xfs_inactive_ifree
@ 2022-12-09 11:05 Long Li
2023-02-01 3:21 ` Darrick J. Wong
0 siblings, 1 reply; 3+ messages in thread
From: Long Li @ 2022-12-09 11:05 UTC (permalink / raw)
To: djwong; +Cc: david, linux-xfs, houtao1, yi.zhang, guoxuenan
After running unplug disk test and unmount filesystem, the umount thread
hung all the time.
crash> dmesg
sd 0:0:0:0: rejecting I/O to offline device
XFS (sda): log I/O error -5
XFS (sda): Corruption of in-memory data (0x8) detected at xfs_defer_finish_noroll+0x12e0/0x1cf0
(fs/xfs/libxfs/xfs_defer.c:504). Shutting down filesystem.
XFS (sda): Please unmount the filesystem and rectify the problem(s)
XFS (sda): xfs_inactive_ifree: xfs_trans_commit returned error -5
XFS (sda): Unmounting Filesystem
crash> bt 3368
PID: 3368 TASK: ffff88801bcd8040 CPU: 3 COMMAND: "umount"
#0 [ffffc900086a7ae0] __schedule at ffffffff83d3fd25
#1 [ffffc900086a7be8] schedule at ffffffff83d414dd
#2 [ffffc900086a7c10] xfs_ail_push_all_sync at ffffffff8256db24
#3 [ffffc900086a7d18] xfs_unmount_flush_inodes at ffffffff824ee7e2
#4 [ffffc900086a7d28] xfs_unmountfs at ffffffff824f2eff
#5 [ffffc900086a7da8] xfs_fs_put_super at ffffffff82503e69
#6 [ffffc900086a7de8] generic_shutdown_super at ffffffff81aeb8cd
#7 [ffffc900086a7e10] kill_block_super at ffffffff81aefcfa
#8 [ffffc900086a7e30] deactivate_locked_super at ffffffff81aeb2da
#9 [ffffc900086a7e48] deactivate_super at ffffffff81aeb639
#10 [ffffc900086a7e68] cleanup_mnt at ffffffff81b6ddd5
#11 [ffffc900086a7ea0] __cleanup_mnt at ffffffff81b6dfdf
#12 [ffffc900086a7eb0] task_work_run at ffffffff8126e5cf
#13 [ffffc900086a7ef8] exit_to_user_mode_prepare at ffffffff813fa136
#14 [ffffc900086a7f28] syscall_exit_to_user_mode at ffffffff83d25dbb
#15 [ffffc900086a7f40] do_syscall_64 at ffffffff83d1f8d9
#16 [ffffc900086a7f50] entry_SYSCALL_64_after_hwframe at ffffffff83e00085
When we free a cluster buffer from xfs_ifree_cluster, all the inodes in
cache are marked XFS_ISTALE. On journal commit dirty stale inodes as are
handled by both buffer and inode log items, inodes marked as XFS_ISTALE
in AIL will be removed from the AIL because the buffer log item will clean
it. If the transaction commit fails in the xfs_inactive_ifree(), inodes
marked as XFS_ISTALE will be left in AIL due to buf log item is not
committed, this will cause the unmount thread above to be blocked all the
time. Error handling in xfs_inactive_ifree() is not enough, the above
exception needs to be considered.
Signed-off-by: Long Li <leo.lilong@huawei.com>
---
fs/xfs/xfs_inode.c | 114 +++++++++++++++++++++++++++++++++++++++++----
fs/xfs/xfs_inode.h | 1 -
2 files changed, 105 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d354ea2b74f9..b6808c0a2868 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,6 +49,9 @@ struct kmem_cache *xfs_inode_cache;
STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *);
+STATIC int xfs_ifree(struct xfs_trans *tp, struct xfs_inode *ip,
+ struct xfs_icluster *xic);
+STATIC void xfs_ifree_abort(struct xfs_inode *ip, struct xfs_icluster *xic);
/*
* helper function to extract extent size hint from inode
@@ -1544,6 +1547,7 @@ xfs_inactive_ifree(
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
+ struct xfs_icluster xic = { 0 };
int error;
/*
@@ -1598,7 +1602,7 @@ xfs_inactive_ifree(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- error = xfs_ifree(tp, ip);
+ error = xfs_ifree(tp, ip, &xic);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (error) {
/*
@@ -1612,7 +1616,7 @@ xfs_inactive_ifree(
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
xfs_trans_cancel(tp);
- return error;
+ goto out_error;
}
/*
@@ -1625,11 +1629,19 @@ xfs_inactive_ifree(
* to try to keep going. Make sure it's not a silent error.
*/
error = xfs_trans_commit(tp);
- if (error)
+ if (error) {
xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error);
+ goto out_error;
+ }
return 0;
+
+out_error:
+ if (xic.deleted)
+ xfs_ifree_abort(ip, &xic);
+
+ return error;
}
/*
@@ -2259,14 +2271,14 @@ xfs_ifree_cluster(
* inodes in the AGI. We need to remove the inode from that list atomically with
* respect to freeing it here.
*/
-int
+STATIC int
xfs_ifree(
struct xfs_trans *tp,
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ struct xfs_icluster *xic)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
- struct xfs_icluster xic = { 0 };
struct xfs_inode_log_item *iip = ip->i_itemp;
int error;
@@ -2284,7 +2296,7 @@ xfs_ifree(
* makes the AGI lock -> unlinked list modification order the same as
* used in O_TMPFILE creation.
*/
- error = xfs_difree(tp, pag, ip->i_ino, &xic);
+ error = xfs_difree(tp, pag, ip->i_ino, xic);
if (error)
goto out;
@@ -2323,13 +2335,97 @@ xfs_ifree(
VFS_I(ip)->i_generation++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (xic.deleted)
- error = xfs_ifree_cluster(tp, pag, ip, &xic);
+ if (xic->deleted)
+ error = xfs_ifree_cluster(tp, pag, ip, xic);
out:
xfs_perag_put(pag);
return error;
}
+static void
+xfs_ifree_abort_inode_stale(
+ struct xfs_perag *pag,
+ xfs_ino_t inum)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_inode_log_item *iip;
+ struct xfs_inode *ip;
+
+retry:
+ rcu_read_lock();
+ ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
+
+ /* Inode not in memory, nothing to do */
+ if (!ip) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* Skip invalid or not stale inode */
+ if (ip->i_ino != inum || !xfs_iflags_test(ip, XFS_ISTALE)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
+ rcu_read_unlock();
+ delay(1);
+ goto retry;
+ }
+
+ iip = ip->i_itemp;
+ if (!iip || list_empty(&iip->ili_item.li_bio_list))
+ goto out_iunlock;
+
+ if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags))
+ xfs_iflush_abort(ip);
+ else
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
+
+out_iunlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ rcu_read_unlock();
+}
+
+/*
+ * This is called to clean up inodes marked as stale in xfs_ifree
+ */
+STATIC void
+xfs_ifree_abort(
+ struct xfs_inode *ip,
+ struct xfs_icluster *xic)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_ino_t inum = xic->first_ino;
+ int nbufs;
+ int i, j;
+ int ioffset;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
+ nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
+
+ for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
+ /*
+ * The allocation bitmap tells us which inodes of the chunk were
+ * physically allocated. Skip the cluster if an inode falls into
+ * a sparse region.
+ */
+ ioffset = inum - xic->first_ino;
+ if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
+ ASSERT(ioffset % igeo->inodes_per_cluster == 0);
+ continue;
+ }
+
+ for (i = 0; i < igeo->inodes_per_cluster; i++)
+ xfs_ifree_abort_inode_stale(pag, inum + i);
+
+ }
+ xfs_perag_put(pag);
+}
+
/*
* This is called to unpin an inode. The caller must have the inode locked
* in at least shared mode so that the buffer cannot be subsequently pinned
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fa780f08dc89..423542bf6af1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -499,7 +499,6 @@ uint xfs_ilock_data_map_shared(struct xfs_inode *);
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
uint xfs_ip2xflags(struct xfs_inode *);
-int xfs_ifree(struct xfs_trans *, struct xfs_inode *);
int xfs_itruncate_extents_flags(struct xfs_trans **,
struct xfs_inode *, int, xfs_fsize_t, int);
void xfs_iext_realloc(xfs_inode_t *, int, int);
--
2.31.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH] xfs: fix hung when transaction commit fail in xfs_inactive_ifree
2022-12-09 11:05 [PATCH] xfs: fix hung when transaction commit fail in xfs_inactive_ifree Long Li
@ 2023-02-01 3:21 ` Darrick J. Wong
2023-02-11 11:46 ` Long Li
0 siblings, 1 reply; 3+ messages in thread
From: Darrick J. Wong @ 2023-02-01 3:21 UTC (permalink / raw)
To: Long Li; +Cc: david, linux-xfs, houtao1, yi.zhang, guoxuenan
On Fri, Dec 09, 2022 at 07:05:19PM +0800, Long Li wrote:
> After running unplug disk test and unmount filesystem, the umount thread
> hung all the time.
>
> crash> dmesg
> sd 0:0:0:0: rejecting I/O to offline device
> XFS (sda): log I/O error -5
> XFS (sda): Corruption of in-memory data (0x8) detected at xfs_defer_finish_noroll+0x12e0/0x1cf0
> (fs/xfs/libxfs/xfs_defer.c:504). Shutting down filesystem.
> XFS (sda): Please unmount the filesystem and rectify the problem(s)
> XFS (sda): xfs_inactive_ifree: xfs_trans_commit returned error -5
> XFS (sda): Unmounting Filesystem
>
> crash> bt 3368
> PID: 3368 TASK: ffff88801bcd8040 CPU: 3 COMMAND: "umount"
> #0 [ffffc900086a7ae0] __schedule at ffffffff83d3fd25
> #1 [ffffc900086a7be8] schedule at ffffffff83d414dd
> #2 [ffffc900086a7c10] xfs_ail_push_all_sync at ffffffff8256db24
> #3 [ffffc900086a7d18] xfs_unmount_flush_inodes at ffffffff824ee7e2
> #4 [ffffc900086a7d28] xfs_unmountfs at ffffffff824f2eff
> #5 [ffffc900086a7da8] xfs_fs_put_super at ffffffff82503e69
> #6 [ffffc900086a7de8] generic_shutdown_super at ffffffff81aeb8cd
> #7 [ffffc900086a7e10] kill_block_super at ffffffff81aefcfa
> #8 [ffffc900086a7e30] deactivate_locked_super at ffffffff81aeb2da
> #9 [ffffc900086a7e48] deactivate_super at ffffffff81aeb639
> #10 [ffffc900086a7e68] cleanup_mnt at ffffffff81b6ddd5
> #11 [ffffc900086a7ea0] __cleanup_mnt at ffffffff81b6dfdf
> #12 [ffffc900086a7eb0] task_work_run at ffffffff8126e5cf
> #13 [ffffc900086a7ef8] exit_to_user_mode_prepare at ffffffff813fa136
> #14 [ffffc900086a7f28] syscall_exit_to_user_mode at ffffffff83d25dbb
> #15 [ffffc900086a7f40] do_syscall_64 at ffffffff83d1f8d9
> #16 [ffffc900086a7f50] entry_SYSCALL_64_after_hwframe at ffffffff83e00085
>
> When we free a cluster buffer from xfs_ifree_cluster, all the inodes in
> cache are marked XFS_ISTALE. On journal commit dirty stale inodes as are
> handled by both buffer and inode log items, inodes marked as XFS_ISTALE
> in AIL will be removed from the AIL because the buffer log item will clean
> it. If the transaction commit fails in the xfs_inactive_ifree(), inodes
> marked as XFS_ISTALE will be left in AIL due to buf log item is not
> committed, this will cause the unmount thread above to be blocked all the
> time. Error handling in xfs_inactive_ifree() is not enough, the above
> exception needs to be considered.
>
> Signed-off-by: Long Li <leo.lilong@huawei.com>
> ---
> fs/xfs/xfs_inode.c | 114 +++++++++++++++++++++++++++++++++++++++++----
> fs/xfs/xfs_inode.h | 1 -
> 2 files changed, 105 insertions(+), 10 deletions(-)
>
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index d354ea2b74f9..b6808c0a2868 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -49,6 +49,9 @@ struct kmem_cache *xfs_inode_cache;
> STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
> STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
> struct xfs_inode *);
> +STATIC int xfs_ifree(struct xfs_trans *tp, struct xfs_inode *ip,
> + struct xfs_icluster *xic);
> +STATIC void xfs_ifree_abort(struct xfs_inode *ip, struct xfs_icluster *xic);
>
> /*
> * helper function to extract extent size hint from inode
> @@ -1544,6 +1547,7 @@ xfs_inactive_ifree(
> {
> struct xfs_mount *mp = ip->i_mount;
> struct xfs_trans *tp;
> + struct xfs_icluster xic = { 0 };
> int error;
>
> /*
> @@ -1598,7 +1602,7 @@ xfs_inactive_ifree(
> xfs_ilock(ip, XFS_ILOCK_EXCL);
> xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
>
> - error = xfs_ifree(tp, ip);
> + error = xfs_ifree(tp, ip, &xic);
> ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> if (error) {
> /*
> @@ -1612,7 +1616,7 @@ xfs_inactive_ifree(
> xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
> }
> xfs_trans_cancel(tp);
> - return error;
> + goto out_error;
> }
>
> /*
> @@ -1625,11 +1629,19 @@ xfs_inactive_ifree(
> * to try to keep going. Make sure it's not a silent error.
> */
> error = xfs_trans_commit(tp);
> - if (error)
> + if (error) {
> xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
> __func__, error);
> + goto out_error;
> + }
>
> return 0;
> +
> +out_error:
> + if (xic.deleted)
> + xfs_ifree_abort(ip, &xic);
> +
> + return error;
> }
>
> /*
> @@ -2259,14 +2271,14 @@ xfs_ifree_cluster(
> * inodes in the AGI. We need to remove the inode from that list atomically with
> * respect to freeing it here.
> */
> -int
> +STATIC int
> xfs_ifree(
> struct xfs_trans *tp,
> - struct xfs_inode *ip)
> + struct xfs_inode *ip,
> + struct xfs_icluster *xic)
> {
> struct xfs_mount *mp = ip->i_mount;
> struct xfs_perag *pag;
> - struct xfs_icluster xic = { 0 };
> struct xfs_inode_log_item *iip = ip->i_itemp;
> int error;
>
> @@ -2284,7 +2296,7 @@ xfs_ifree(
> * makes the AGI lock -> unlinked list modification order the same as
> * used in O_TMPFILE creation.
> */
> - error = xfs_difree(tp, pag, ip->i_ino, &xic);
> + error = xfs_difree(tp, pag, ip->i_ino, xic);
> if (error)
> goto out;
>
> @@ -2323,13 +2335,97 @@ xfs_ifree(
> VFS_I(ip)->i_generation++;
> xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
>
> - if (xic.deleted)
> - error = xfs_ifree_cluster(tp, pag, ip, &xic);
> + if (xic->deleted)
> + error = xfs_ifree_cluster(tp, pag, ip, xic);
> out:
> xfs_perag_put(pag);
> return error;
> }
>
> +static void
> +xfs_ifree_abort_inode_stale(
> + struct xfs_perag *pag,
> + xfs_ino_t inum)
> +{
> + struct xfs_mount *mp = pag->pag_mount;
> + struct xfs_inode_log_item *iip;
> + struct xfs_inode *ip;
> +
> +retry:
> + rcu_read_lock();
> + ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
> +
> + /* Inode not in memory, nothing to do */
> + if (!ip) {
> + rcu_read_unlock();
> + return;
> + }
> +
> + /* Skip invalid or not stale inode */
> + if (ip->i_ino != inum || !xfs_iflags_test(ip, XFS_ISTALE)) {
> + rcu_read_unlock();
> + return;
> + }
> +
> + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
> + rcu_read_unlock();
> + delay(1);
> + goto retry;
> + }
> +
> + iip = ip->i_itemp;
> + if (!iip || list_empty(&iip->ili_item.li_bio_list))
> + goto out_iunlock;
> +
> + if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags))
> + xfs_iflush_abort(ip);
> + else
> + xfs_iflags_clear(ip, XFS_IFLUSHING);
Er... why is the ifree code tearing into the inode log item state ?
Shouldn't this be getting done from the buffer log item when we release
it and find that it's aborted?
--D
> +
> +out_iunlock:
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + rcu_read_unlock();
> +}
> +
> +/*
> + * This is called to clean up inodes marked as stale in xfs_ifree
> + */
> +STATIC void
> +xfs_ifree_abort(
> + struct xfs_inode *ip,
> + struct xfs_icluster *xic)
> +{
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_perag *pag;
> + struct xfs_ino_geometry *igeo = M_IGEO(mp);
> + xfs_ino_t inum = xic->first_ino;
> + int nbufs;
> + int i, j;
> + int ioffset;
> +
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> +
> + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
> +
> + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
> + /*
> + * The allocation bitmap tells us which inodes of the chunk were
> + * physically allocated. Skip the cluster if an inode falls into
> + * a sparse region.
> + */
> + ioffset = inum - xic->first_ino;
> + if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
> + ASSERT(ioffset % igeo->inodes_per_cluster == 0);
> + continue;
> + }
> +
> + for (i = 0; i < igeo->inodes_per_cluster; i++)
> + xfs_ifree_abort_inode_stale(pag, inum + i);
> +
> + }
> + xfs_perag_put(pag);
> +}
> +
> /*
> * This is called to unpin an inode. The caller must have the inode locked
> * in at least shared mode so that the buffer cannot be subsequently pinned
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index fa780f08dc89..423542bf6af1 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -499,7 +499,6 @@ uint xfs_ilock_data_map_shared(struct xfs_inode *);
> uint xfs_ilock_attr_map_shared(struct xfs_inode *);
>
> uint xfs_ip2xflags(struct xfs_inode *);
> -int xfs_ifree(struct xfs_trans *, struct xfs_inode *);
> int xfs_itruncate_extents_flags(struct xfs_trans **,
> struct xfs_inode *, int, xfs_fsize_t, int);
> void xfs_iext_realloc(xfs_inode_t *, int, int);
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH] xfs: fix hung when transaction commit fail in xfs_inactive_ifree
2023-02-01 3:21 ` Darrick J. Wong
@ 2023-02-11 11:46 ` Long Li
0 siblings, 0 replies; 3+ messages in thread
From: Long Li @ 2023-02-11 11:46 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: david, linux-xfs, houtao1, yi.zhang, guoxuenan
On Tue, Jan 31, 2023 at 07:21:39PM -0800, Darrick J. Wong wrote:
> On Fri, Dec 09, 2022 at 07:05:19PM +0800, Long Li wrote:
> > After running unplug disk test and unmount filesystem, the umount thread
> > hung all the time.
> >
> > crash> dmesg
> > sd 0:0:0:0: rejecting I/O to offline device
> > XFS (sda): log I/O error -5
> > XFS (sda): Corruption of in-memory data (0x8) detected at xfs_defer_finish_noroll+0x12e0/0x1cf0
> > (fs/xfs/libxfs/xfs_defer.c:504). Shutting down filesystem.
> > XFS (sda): Please unmount the filesystem and rectify the problem(s)
> > XFS (sda): xfs_inactive_ifree: xfs_trans_commit returned error -5
> > XFS (sda): Unmounting Filesystem
> >
> > crash> bt 3368
> > PID: 3368 TASK: ffff88801bcd8040 CPU: 3 COMMAND: "umount"
> > #0 [ffffc900086a7ae0] __schedule at ffffffff83d3fd25
> > #1 [ffffc900086a7be8] schedule at ffffffff83d414dd
> > #2 [ffffc900086a7c10] xfs_ail_push_all_sync at ffffffff8256db24
> > #3 [ffffc900086a7d18] xfs_unmount_flush_inodes at ffffffff824ee7e2
> > #4 [ffffc900086a7d28] xfs_unmountfs at ffffffff824f2eff
> > #5 [ffffc900086a7da8] xfs_fs_put_super at ffffffff82503e69
> > #6 [ffffc900086a7de8] generic_shutdown_super at ffffffff81aeb8cd
> > #7 [ffffc900086a7e10] kill_block_super at ffffffff81aefcfa
> > #8 [ffffc900086a7e30] deactivate_locked_super at ffffffff81aeb2da
> > #9 [ffffc900086a7e48] deactivate_super at ffffffff81aeb639
> > #10 [ffffc900086a7e68] cleanup_mnt at ffffffff81b6ddd5
> > #11 [ffffc900086a7ea0] __cleanup_mnt at ffffffff81b6dfdf
> > #12 [ffffc900086a7eb0] task_work_run at ffffffff8126e5cf
> > #13 [ffffc900086a7ef8] exit_to_user_mode_prepare at ffffffff813fa136
> > #14 [ffffc900086a7f28] syscall_exit_to_user_mode at ffffffff83d25dbb
> > #15 [ffffc900086a7f40] do_syscall_64 at ffffffff83d1f8d9
> > #16 [ffffc900086a7f50] entry_SYSCALL_64_after_hwframe at ffffffff83e00085
> >
> > When we free a cluster buffer from xfs_ifree_cluster, all the inodes in
> > cache are marked XFS_ISTALE. On journal commit dirty stale inodes as are
> > handled by both buffer and inode log items, inodes marked as XFS_ISTALE
> > in AIL will be removed from the AIL because the buffer log item will clean
> > it. If the transaction commit fails in the xfs_inactive_ifree(), inodes
> > marked as XFS_ISTALE will be left in AIL due to buf log item is not
> > committed, this will cause the unmount thread above to be blocked all the
> > time. Error handling in xfs_inactive_ifree() is not enough, the above
> > exception needs to be considered.
> >
> > Signed-off-by: Long Li <leo.lilong@huawei.com>
> > ---
> > fs/xfs/xfs_inode.c | 114 +++++++++++++++++++++++++++++++++++++++++----
> > fs/xfs/xfs_inode.h | 1 -
> > 2 files changed, 105 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index d354ea2b74f9..b6808c0a2868 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -49,6 +49,9 @@ struct kmem_cache *xfs_inode_cache;
> > STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
> > STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
> > struct xfs_inode *);
> > +STATIC int xfs_ifree(struct xfs_trans *tp, struct xfs_inode *ip,
> > + struct xfs_icluster *xic);
> > +STATIC void xfs_ifree_abort(struct xfs_inode *ip, struct xfs_icluster *xic);
> >
> > /*
> > * helper function to extract extent size hint from inode
> > @@ -1544,6 +1547,7 @@ xfs_inactive_ifree(
> > {
> > struct xfs_mount *mp = ip->i_mount;
> > struct xfs_trans *tp;
> > + struct xfs_icluster xic = { 0 };
> > int error;
> >
> > /*
> > @@ -1598,7 +1602,7 @@ xfs_inactive_ifree(
> > xfs_ilock(ip, XFS_ILOCK_EXCL);
> > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> >
> > - error = xfs_ifree(tp, ip);
> > + error = xfs_ifree(tp, ip, &xic);
> > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> > if (error) {
> > /*
> > @@ -1612,7 +1616,7 @@ xfs_inactive_ifree(
> > xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
> > }
> > xfs_trans_cancel(tp);
> > - return error;
> > + goto out_error;
> > }
> >
> > /*
> > @@ -1625,11 +1629,19 @@ xfs_inactive_ifree(
> > * to try to keep going. Make sure it's not a silent error.
> > */
> > error = xfs_trans_commit(tp);
> > - if (error)
> > + if (error) {
> > xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
> > __func__, error);
> > + goto out_error;
> > + }
> >
> > return 0;
> > +
> > +out_error:
> > + if (xic.deleted)
> > + xfs_ifree_abort(ip, &xic);
> > +
> > + return error;
> > }
> >
> > /*
> > @@ -2259,14 +2271,14 @@ xfs_ifree_cluster(
> > * inodes in the AGI. We need to remove the inode from that list atomically with
> > * respect to freeing it here.
> > */
> > -int
> > +STATIC int
> > xfs_ifree(
> > struct xfs_trans *tp,
> > - struct xfs_inode *ip)
> > + struct xfs_inode *ip,
> > + struct xfs_icluster *xic)
> > {
> > struct xfs_mount *mp = ip->i_mount;
> > struct xfs_perag *pag;
> > - struct xfs_icluster xic = { 0 };
> > struct xfs_inode_log_item *iip = ip->i_itemp;
> > int error;
> >
> > @@ -2284,7 +2296,7 @@ xfs_ifree(
> > * makes the AGI lock -> unlinked list modification order the same as
> > * used in O_TMPFILE creation.
> > */
> > - error = xfs_difree(tp, pag, ip->i_ino, &xic);
> > + error = xfs_difree(tp, pag, ip->i_ino, xic);
> > if (error)
> > goto out;
> >
> > @@ -2323,13 +2335,97 @@ xfs_ifree(
> > VFS_I(ip)->i_generation++;
> > xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> >
> > - if (xic.deleted)
> > - error = xfs_ifree_cluster(tp, pag, ip, &xic);
> > + if (xic->deleted)
> > + error = xfs_ifree_cluster(tp, pag, ip, xic);
> > out:
> > xfs_perag_put(pag);
> > return error;
> > }
> >
> > +static void
> > +xfs_ifree_abort_inode_stale(
> > + struct xfs_perag *pag,
> > + xfs_ino_t inum)
> > +{
> > + struct xfs_mount *mp = pag->pag_mount;
> > + struct xfs_inode_log_item *iip;
> > + struct xfs_inode *ip;
> > +
> > +retry:
> > + rcu_read_lock();
> > + ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
> > +
> > + /* Inode not in memory, nothing to do */
> > + if (!ip) {
> > + rcu_read_unlock();
> > + return;
> > + }
> > +
> > + /* Skip invalid or not stale inode */
> > + if (ip->i_ino != inum || !xfs_iflags_test(ip, XFS_ISTALE)) {
> > + rcu_read_unlock();
> > + return;
> > + }
> > +
> > + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
> > + rcu_read_unlock();
> > + delay(1);
> > + goto retry;
> > + }
> > +
> > + iip = ip->i_itemp;
> > + if (!iip || list_empty(&iip->ili_item.li_bio_list))
> > + goto out_iunlock;
> > +
> > + if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags))
> > + xfs_iflush_abort(ip);
> > + else
> > + xfs_iflags_clear(ip, XFS_IFLUSHING);
>
> Er... why is the ifree code tearing into the inode log item state ?
>
> Shouldn't this be getting done from the buffer log item when we release
> it and find that it's aborted?
>
> --D
Yes, it doesn't looks good here, traverse buffer's b_li_list and abort xfs_inode
marked as XFS_ISTALE could be better.
>
> > +
> > +out_iunlock:
> > + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> > + rcu_read_unlock();
> > +}
> > +
> > +/*
> > + * This is called to clean up inodes marked as stale in xfs_ifree
> > + */
> > +STATIC void
> > +xfs_ifree_abort(
> > + struct xfs_inode *ip,
> > + struct xfs_icluster *xic)
> > +{
> > + struct xfs_mount *mp = ip->i_mount;
> > + struct xfs_perag *pag;
> > + struct xfs_ino_geometry *igeo = M_IGEO(mp);
> > + xfs_ino_t inum = xic->first_ino;
> > + int nbufs;
> > + int i, j;
> > + int ioffset;
> > +
> > + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> > +
> > + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
> > +
> > + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
> > + /*
> > + * The allocation bitmap tells us which inodes of the chunk were
> > + * physically allocated. Skip the cluster if an inode falls into
> > + * a sparse region.
> > + */
> > + ioffset = inum - xic->first_ino;
> > + if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
> > + ASSERT(ioffset % igeo->inodes_per_cluster == 0);
> > + continue;
> > + }
> > +
> > + for (i = 0; i < igeo->inodes_per_cluster; i++)
> > + xfs_ifree_abort_inode_stale(pag, inum + i);
> > +
> > + }
> > + xfs_perag_put(pag);
> > +}
> > +
> > /*
> > * This is called to unpin an inode. The caller must have the inode locked
> > * in at least shared mode so that the buffer cannot be subsequently pinned
> > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> > index fa780f08dc89..423542bf6af1 100644
> > --- a/fs/xfs/xfs_inode.h
> > +++ b/fs/xfs/xfs_inode.h
> > @@ -499,7 +499,6 @@ uint xfs_ilock_data_map_shared(struct xfs_inode *);
> > uint xfs_ilock_attr_map_shared(struct xfs_inode *);
> >
> > uint xfs_ip2xflags(struct xfs_inode *);
> > -int xfs_ifree(struct xfs_trans *, struct xfs_inode *);
> > int xfs_itruncate_extents_flags(struct xfs_trans **,
> > struct xfs_inode *, int, xfs_fsize_t, int);
> > void xfs_iext_realloc(xfs_inode_t *, int, int);
> > --
> > 2.31.1
> >
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-02-11 11:23 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-09 11:05 [PATCH] xfs: fix hung when transaction commit fail in xfs_inactive_ifree Long Li
2023-02-01 3:21 ` Darrick J. Wong
2023-02-11 11:46 ` Long Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox