[PATCH] xfs: inode buffers may not be valid during recovery readahead

From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH] xfs: inode buffers may not be valid during recovery readahead
Date: Tue, 27 Aug 2013 11:39:37 +1000	[thread overview]
Message-ID: <1377567577-24312-1-git-send-email-david@fromorbit.com> (raw)

From: Dave Chinner <dchinner@redhat.com>

CRC enabled filesystems fail log recovery with 100% reliability on
xfstests xfs/085 with the following failure:

XFS (vdb): Mounting Filesystem
XFS (vdb): Starting recovery (logdev: internal)
XFS (vdb): Corruption detected. Unmount and run xfs_repair
XFS (vdb): bad inode magic/vsn daddr 144 #0 (magic=0)
XFS: Assertion failed: 0, file: fs/xfs/xfs_inode_buf.c, line: 95

The problem is that the inode buffer has not been recovered before
the readahead on the inode buffer is issued. The checkpoint being
recovered actually allocates the inode chunk we are doing readahead
from, so what comes from disk during readahead is essentially
random and the verifier barfs on it.

This inode buffer readahead problem affects non-crc filesystems,
too, but xfstests does not trigger it at all on such
configurations....

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_inode_buf.c   | 36 +++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_inode_buf.h   |  1 +
 fs/xfs/xfs_log_recover.c |  2 +-
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 38fe509..e011d59 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -61,9 +61,22 @@ xfs_inobp_check(
 }
 #endif
 
+/*
+ * If we are doing readahead on an inode buffer, we might be in log recovery
+ * reading an inode allocation buffer that hasn't yet been replayed, and hence
+ * has not had the inode cores stamped into it. Hence for readahead, the buffer
+ * may be potentially invalid.
+ *
+ * If the readahead buffer is invalid, we don't want to mark it with an error,
+ * but we do want to clear the DONE status of the buffer so that a followup read
+ * will re-read it from disk. This will ensure that we don't get an unnecessary
+ * warnings during log recovery and we don't get unnecssary panics on debug
+ * kernels.
+ */
 static void
 xfs_inode_buf_verify(
-	struct xfs_buf	*bp)
+	struct xfs_buf	*bp,
+	bool		readahead)
 {
 	struct xfs_mount *mp = bp->b_target->bt_mount;
 	int		i;
@@ -84,6 +97,11 @@ xfs_inode_buf_verify(
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP,
 						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (readahead) {
+				bp->b_flags &= ~XBF_DONE;
+				return;
+			}
+
 			xfs_buf_ioerror(bp, EFSCORRUPTED);
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
 					     mp, dip);
@@ -104,14 +122,21 @@ static void
 xfs_inode_buf_read_verify(
 	struct xfs_buf	*bp)
 {
-	xfs_inode_buf_verify(bp);
+	xfs_inode_buf_verify(bp, false);
+}
+
+static void
+xfs_inode_buf_readahead_verify(
+	struct xfs_buf	*bp)
+{
+	xfs_inode_buf_verify(bp, true);
 }
 
 static void
 xfs_inode_buf_write_verify(
 	struct xfs_buf	*bp)
 {
-	xfs_inode_buf_verify(bp);
+	xfs_inode_buf_verify(bp, false);
 }
 
 const struct xfs_buf_ops xfs_inode_buf_ops = {
@@ -119,6 +144,11 @@ const struct xfs_buf_ops xfs_inode_buf_ops = {
 	.verify_write = xfs_inode_buf_write_verify,
 };
 
+const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+	.verify_read = xfs_inode_buf_readahead_verify,
+	.verify_write = xfs_inode_buf_write_verify,
+};
+
 
 /*
  * This routine is called to map an inode to the buffer containing the on-disk
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h
index aae9fc4..599e6c0 100644
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/xfs_inode_buf.h
@@ -48,5 +48,6 @@ void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #endif /* DEBUG */
 
 extern const struct xfs_buf_ops xfs_inode_buf_ops;
+extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
 
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 90b756f..ac9c18b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3169,7 +3169,7 @@ xlog_recover_inode_ra_pass2(
 		return;
 
 	xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno,
-				ilfp->ilf_len, &xfs_inode_buf_ops);
+				ilfp->ilf_len, &xfs_inode_buf_ra_ops);
 }
 
 STATIC void
-- 
1.8.3.2

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs