From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Cc: npiggin@kernel.de
Subject: [PATCH 2/2] xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE
Date: Fri, 30 Jul 2010 08:55:46 +1000 [thread overview]
Message-ID: <1280444146-14540-3-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1280444146-14540-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Under heavy load parallel metadata loads (e.g. dbench), we can fail
to mark all the inodes in a cluster being freed as XFS_ISTALE as we
skip inodes we cannot get the XFS_ILOCK_EXCL or the flush lock on.
When this happens and the inode cluster buffer has already been
marked stale and freed, inode reclaim can try to write the inode out
as it is dirty and not marked stale. This can result in writing th
metadata to an freed extent, or in the case it has already
been overwritten trigger a magic number check failure and return an
EUCLEAN error such as:
Filesystem "ram0": inode 0x442ba1 background reclaim flush failed with 117
Fix this by ensuring that we hoover up all in memory inodes in the
cluster and mark them XFS_ISTALE when freeing the cluster.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_inode.c | 88 ++++++++++++++++++++++++++++-----------------------
1 files changed, 48 insertions(+), 40 deletions(-)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 68415cb..abc50b6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
return 0;
}
+/*
+ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * inodes that are in memory - they all must be marked stale and attached to
+ * the cluster buffer.
+ */
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
}
for (j = 0; j < nbufs; j++, inum += ninodes) {
- int found = 0;
-
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
@@ -1963,26 +1966,6 @@ xfs_ifree_cluster(
XBF_LOCK);
/*
- * Walk the inodes already attached to the buffer and mark them
- * stale. These will all have the flush locks held, so an
- * in-memory inode walk can't lock them.
- */
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- while (lip) {
- if (lip->li_type == XFS_LI_INODE) {
- iip = (xfs_inode_log_item_t *)lip;
- ASSERT(iip->ili_logged == 1);
- lip->li_cb = xfs_istale_done;
- xfs_trans_ail_copy_lsn(mp->m_ail,
- &iip->ili_flush_lsn,
- &iip->ili_item.li_lsn);
- xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- found++;
- }
- lip = lip->li_bio_list;
- }
-
- /*
* For each inode in memory attempt to add it to the inode
* buffer and set it up for being staled on buffer IO
* completion. This is safe as we've locked out tail pushing
@@ -1999,42 +1982,68 @@ xfs_ifree_cluster(
/* Inode not in memory or stale, nothing to do */
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
+next_inode:
read_unlock(&pag->pag_ici_lock);
continue;
}
- /* don't try to lock/unlock the current inode */
+ /*
+ * Walk the inodes already attached to the buffer to
+ * see if this inode is already there. If so, mark it
+ * stale. These will all have the flush locks held, so an
+ * in-memory inode walk can't lock them. We do this
+ * inside the loop so that we know for certain that we
+ * don't encounter inodes with the flush lock already
+ * held and hence deadlock below.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ do {
+ if (lip->li_type == XFS_LI_INODE) {
+ iip = (xfs_inode_log_item_t *)lip;
+ if (iip->ili_inode != ip)
+ continue;
+ ASSERT(iip->ili_logged == 1);
+ lip->li_cb = xfs_istale_done;
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+ goto next_inode;
+ }
+ } while ((lip = lip->li_bio_list) != NULL);
+
+ /*
+ * Don't try to lock/unlock the current inode, but we
+ * _cannot_ skip the other inodes that we did not find
+ * in the list attached to the buffer and are not
+ * already marked stale. If we can't lock it, back off
+ * and retry.
+ */
+
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
+ delay(1);
+ i--;
continue;
}
read_unlock(&pag->pag_ici_lock);
- if (!xfs_iflock_nowait(ip)) {
- if (ip != free_ip)
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
-
+ xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- ASSERT(ip != free_ip);
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- continue;
- }
+ /*
+ * we don't need to attach clean inodes or those only
+ * with unlogged changes (which we throw away, anyway).
+ */
iip = ip->i_itemp;
- if (!iip) {
- /* inode with unlogged changes only */
+ if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
- found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2049,8 +2058,7 @@ xfs_ifree_cluster(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (found)
- xfs_trans_stale_inode_buf(tp, bp);
+ xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
--
1.7.1
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-06-08 15:18 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-29 22:55 [PATCH 0/2] xfs: fix bugs uncovered by dbench testing Dave Chinner
2010-07-29 22:55 ` [PATCH 1/2] xfs: unlock items before allowing the CIL to commit Dave Chinner
2010-07-30 8:49 ` Christoph Hellwig
2010-07-29 22:55 ` Dave Chinner [this message]
2010-07-30 10:27 ` [PATCH 2/2] xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE Christoph Hellwig
2010-07-30 10:54 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1280444146-14540-3-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=npiggin@kernel.de \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox