linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: stable@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, Carlos Maiolino <cmaiolino@redhat.com>,
	"Darrick J . Wong" <darrick.wong@oracle.com>
Subject: [PATCH 25/47] xfs: Properly retry failed inode items in case of error during buffer writeback
Date: Sun, 17 Sep 2017 14:06:50 -0700	[thread overview]
Message-ID: <20170917210712.10804-26-hch@lst.de> (raw)
In-Reply-To: <20170917210712.10804-1-hch@lst.de>

From: Carlos Maiolino <cmaiolino@redhat.com>

commit d3a304b6292168b83b45d624784f973fdc1ca674 upstream.

When a buffer has been failed during writeback, the inode items into it
are kept flush locked, and are never resubmitted due the flush lock, so,
if any buffer fails to be written, the items in AIL are never written to
disk and never unlocked.

This causes unmount operation to hang due these items flush locked in AIL,
but this also causes the items in AIL to never be written back, even when
the IO device comes back to normal.

I've been testing this patch with a DM-thin device, creating a
filesystem larger than the real device.

When writing enough data to fill the DM-thin device, XFS receives ENOSPC
errors from the device, and keep spinning on xfsaild (when 'retry
forever' configuration is set).

At this point, the filesystem can not be unmounted because of the flush locked
items in AIL, but worse, the items in AIL are never retried at all
(once xfs_inode_item_push() will skip the items that are flush locked),
even if the underlying DM-thin device is expanded to the proper size.

This patch fixes both cases, retrying any item that has been failed
previously, using the infra-structure provided by the previous patch.

Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf_item.c   | 28 ++++++++++++++++++++++++++++
 fs/xfs/xfs_buf_item.h   |  3 +++
 fs/xfs/xfs_inode_item.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
 fs/xfs/xfs_trans.h      |  1 +
 fs/xfs/xfs_trans_ail.c  |  3 ++-
 fs/xfs/xfs_trans_priv.h | 31 +++++++++++++++++++++++++++++++
 6 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7573a1f0bc9a..573fc72c3f23 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1234,3 +1234,31 @@ xfs_buf_iodone(
 	xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
 	xfs_buf_item_free(BUF_ITEM(lip));
 }
+
+/*
+ * Requeue a failed buffer for writeback
+ *
+ * Return true if the buffer has been re-queued properly, false otherwise
+ */
+bool
+xfs_buf_resubmit_failed_buffers(
+	struct xfs_buf		*bp,
+	struct xfs_log_item	*lip,
+	struct list_head	*buffer_list)
+{
+	struct xfs_log_item	*next;
+
+	/*
+	 * Clear XFS_LI_FAILED flag from all items before resubmit
+	 *
+	 * XFS_LI_FAILED set/clear is protected by xa_lock, caller  this
+	 * function already have it acquired
+	 */
+	for (; lip; lip = next) {
+		next = lip->li_bio_list;
+		xfs_clear_li_failed(lip);
+	}
+
+	/* Add this buffer back to the delayed write list */
+	return xfs_buf_delwri_queue(bp, buffer_list);
+}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index f7eba99d19dd..530686e1afb9 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -70,6 +70,9 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
 			      xfs_log_item_t *);
 void	xfs_buf_iodone_callbacks(struct xfs_buf *);
 void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
+bool	xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
+					struct xfs_log_item *,
+					struct list_head *);
 
 extern kmem_zone_t	*xfs_buf_item_zone;
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 08cb7d1a4a3a..94915747042c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -27,6 +27,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
 #include "xfs_log.h"
 
 
@@ -475,6 +476,23 @@ xfs_inode_item_unpin(
 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
 }
 
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * This informs the AIL that the inode is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_inode_item_error(
+	struct xfs_log_item	*lip,
+	struct xfs_buf		*bp)
+{
+	ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
+	xfs_set_li_failed(lip, bp);
+}
+
 STATIC uint
 xfs_inode_item_push(
 	struct xfs_log_item	*lip,
@@ -484,13 +502,28 @@ xfs_inode_item_push(
 {
 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 	struct xfs_inode	*ip = iip->ili_inode;
-	struct xfs_buf		*bp = NULL;
+	struct xfs_buf		*bp = lip->li_buf;
 	uint			rval = XFS_ITEM_SUCCESS;
 	int			error;
 
 	if (xfs_ipincount(ip) > 0)
 		return XFS_ITEM_PINNED;
 
+	/*
+	 * The buffer containing this item failed to be written back
+	 * previously. Resubmit the buffer for IO.
+	 */
+	if (lip->li_flags & XFS_LI_FAILED) {
+		if (!xfs_buf_trylock(bp))
+			return XFS_ITEM_LOCKED;
+
+		if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+			rval = XFS_ITEM_FLUSHING;
+
+		xfs_buf_unlock(bp);
+		return rval;
+	}
+
 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
 		return XFS_ITEM_LOCKED;
 
@@ -622,7 +655,8 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
 	.iop_unlock	= xfs_inode_item_unlock,
 	.iop_committed	= xfs_inode_item_committed,
 	.iop_push	= xfs_inode_item_push,
-	.iop_committing = xfs_inode_item_committing
+	.iop_committing = xfs_inode_item_committing,
+	.iop_error	= xfs_inode_item_error
 };
 
 
@@ -710,7 +744,8 @@ xfs_iflush_done(
 		 * the AIL lock.
 		 */
 		iip = INODE_ITEM(blip);
-		if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
+		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
+		    lip->li_flags & XFS_LI_FAILED)
 			need_ail++;
 
 		blip = next;
@@ -718,7 +753,8 @@ xfs_iflush_done(
 
 	/* make sure we capture the state of the initial inode. */
 	iip = INODE_ITEM(lip);
-	if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
+	if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
+	    lip->li_flags & XFS_LI_FAILED)
 		need_ail++;
 
 	/*
@@ -739,6 +775,9 @@ xfs_iflush_done(
 			if (INODE_ITEM(blip)->ili_logged &&
 			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
 				mlip_changed |= xfs_ail_delete_one(ailp, blip);
+			else {
+				xfs_clear_li_failed(blip);
+			}
 		}
 
 		if (mlip_changed) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 22fddad9fe11..0318e92aed66 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -50,6 +50,7 @@ typedef struct xfs_log_item {
 	struct xfs_ail			*li_ailp;	/* ptr to AIL */
 	uint				li_type;	/* item type */
 	uint				li_flags;	/* misc flags */
+	struct xfs_buf			*li_buf;	/* real buffer pointer */
 	struct xfs_log_item		*li_bio_list;	/* buffer item list */
 	void				(*li_cb)(struct xfs_buf *,
 						 struct xfs_log_item *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 9056c0f34a3c..70f5ab017323 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -687,12 +687,13 @@ xfs_trans_ail_update_bulk(
 bool
 xfs_ail_delete_one(
 	struct xfs_ail		*ailp,
-	struct xfs_log_item 	*lip)
+	struct xfs_log_item	*lip)
 {
 	struct xfs_log_item	*mlip = xfs_ail_min(ailp);
 
 	trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
 	xfs_ail_delete(ailp, lip);
+	xfs_clear_li_failed(lip);
 	lip->li_flags &= ~XFS_LI_IN_AIL;
 	lip->li_lsn = 0;
 
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index d91706c56c63..b317a3644c00 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -164,4 +164,35 @@ xfs_trans_ail_copy_lsn(
 	*dst = *src;
 }
 #endif
+
+static inline void
+xfs_clear_li_failed(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_buf	*bp = lip->li_buf;
+
+	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+	lockdep_assert_held(&lip->li_ailp->xa_lock);
+
+	if (lip->li_flags & XFS_LI_FAILED) {
+		lip->li_flags &= ~XFS_LI_FAILED;
+		lip->li_buf = NULL;
+		xfs_buf_rele(bp);
+	}
+}
+
+static inline void
+xfs_set_li_failed(
+	struct xfs_log_item	*lip,
+	struct xfs_buf		*bp)
+{
+	lockdep_assert_held(&lip->li_ailp->xa_lock);
+
+	if (!(lip->li_flags & XFS_LI_FAILED)) {
+		xfs_buf_hold(bp);
+		lip->li_flags |= XFS_LI_FAILED;
+		lip->li_buf = bp;
+	}
+}
+
 #endif	/* __XFS_TRANS_PRIV_H__ */
-- 
2.14.1


  parent reply	other threads:[~2017-09-17 21:07 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-17 21:06 4.9-stable updates for XFS Christoph Hellwig
2017-09-17 21:06 ` [PATCH 01/47] xfs: Move handling of missing page into one place in xfs_find_get_desired_pgoff() Christoph Hellwig
2017-09-17 21:06 ` [PATCH 02/47] xfs: fix spurious spin_is_locked() assert failures on non-smp kernels Christoph Hellwig
2017-09-17 21:06 ` [PATCH 03/47] xfs: push buffer of flush locked dquot to avoid quotacheck deadlock Christoph Hellwig
2017-09-17 21:06 ` [PATCH 04/47] xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent Christoph Hellwig
2017-09-17 21:06 ` [PATCH 05/47] xfs: release bli from transaction properly on fs shutdown Christoph Hellwig
2017-09-17 21:06 ` [PATCH 06/47] xfs: remove bli from AIL before release on transaction abort Christoph Hellwig
2017-09-17 21:06 ` [PATCH 07/47] xfs: don't allow bmap on rt files Christoph Hellwig
2017-09-17 21:06 ` [PATCH 08/47] xfs: free uncommitted transactions during log recovery Christoph Hellwig
2017-09-17 21:06 ` [PATCH 09/47] xfs: free cowblocks and retry on buffered write ENOSPC Christoph Hellwig
2017-09-17 21:06 ` [PATCH 10/47] xfs: don't crash on unexpected holes in dir/attr btrees Christoph Hellwig
2017-09-17 21:06 ` [PATCH 11/47] xfs: check _btree_check_block value Christoph Hellwig
2017-09-17 21:06 ` [PATCH 12/47] xfs: set firstfsb to NULLFSBLOCK before feeding it to _bmapi_write Christoph Hellwig
2017-09-17 21:06 ` [PATCH 13/47] xfs: check _alloc_read_agf buffer pointer before using Christoph Hellwig
2017-09-17 21:06 ` [PATCH 14/47] xfs: fix quotacheck dquot id overflow infinite loop Christoph Hellwig
2017-09-17 21:06 ` [PATCH 15/47] xfs: fix multi-AG deadlock in xfs_bunmapi Christoph Hellwig
2017-09-17 21:06 ` [PATCH 16/47] xfs: Fix per-inode DAX flag inheritance Christoph Hellwig
2017-09-17 21:06 ` [PATCH 17/47] xfs: fix inobt inode allocation search optimization Christoph Hellwig
2017-09-17 21:06 ` [PATCH 18/47] xfs: clear MS_ACTIVE after finishing log recovery Christoph Hellwig
2017-09-17 21:06 ` [PATCH 19/47] xfs: don't leak quotacheck dquots when cow recovery Christoph Hellwig
2017-09-17 21:06 ` [PATCH 20/47] iomap: fix integer truncation issues in the zeroing and dirtying helpers Christoph Hellwig
2017-09-17 21:06 ` [PATCH 21/47] xfs: write unmount record for ro mounts Christoph Hellwig
2017-09-17 21:06 ` [PATCH 22/47] xfs: toggle readonly state around xfs_log_mount_finish Christoph Hellwig
2017-09-17 21:06 ` [PATCH 23/47] xfs: remove xfs_trans_ail_delete_bulk Christoph Hellwig
2017-09-17 21:06 ` [PATCH 24/47] xfs: Add infrastructure needed for error propagation during buffer IO failure Christoph Hellwig
2017-09-17 21:06 ` Christoph Hellwig [this message]
2017-09-17 21:06 ` [PATCH 26/47] xfs: fix recovery failure when log record header wraps log end Christoph Hellwig
2017-09-17 21:06 ` [PATCH 27/47] xfs: always verify the log tail during recovery Christoph Hellwig
2017-09-17 21:06 ` [PATCH 28/47] xfs: fix log recovery corruption error due to tail overwrite Christoph Hellwig
2017-09-17 21:06 ` [PATCH 29/47] xfs: handle -EFSCORRUPTED during head/tail verification Christoph Hellwig
2017-09-17 21:06 ` [PATCH 30/47] xfs: add log recovery tracepoint for head/tail Christoph Hellwig
2017-09-17 21:06 ` [PATCH 31/47] xfs: stop searching for free slots in an inode chunk when there are none Christoph Hellwig
2017-09-17 21:06 ` [PATCH 32/47] xfs: evict all inodes involved with log redo item Christoph Hellwig
2017-09-17 21:06 ` [PATCH 33/47] xfs: check for race with xfs_reclaim_inode() in xfs_ifree_cluster() Christoph Hellwig
2017-09-17 21:06 ` [PATCH 34/47] xfs: open-code xfs_buf_item_dirty() Christoph Hellwig
2017-09-17 21:07 ` [PATCH 35/47] xfs: remove unnecessary dirty bli format check for ordered bufs Christoph Hellwig
2017-09-17 21:07 ` [PATCH 36/47] xfs: ordered buffer log items are never formatted Christoph Hellwig
2017-09-17 21:07 ` [PATCH 37/47] xfs: refactor buffer logging into buffer dirtying helper Christoph Hellwig
2017-09-17 21:07 ` [PATCH 38/47] xfs: don't log dirty ranges for ordered buffers Christoph Hellwig
2017-09-17 21:07 ` [PATCH 39/47] xfs: skip bmbt block ino validation during owner change Christoph Hellwig
2017-09-17 21:07 ` [PATCH 40/47] xfs: move bmbt owner change to last step of extent swap Christoph Hellwig
2017-09-17 21:07 ` [PATCH 41/47] xfs: disallow marking previously dirty buffers as ordered Christoph Hellwig
2017-09-17 21:07 ` [PATCH 42/47] xfs: relog dirty buffers during swapext bmbt owner change Christoph Hellwig
2017-09-17 21:07 ` [PATCH 43/47] xfs: disable per-inode DAX flag Christoph Hellwig
2017-09-17 21:07 ` [PATCH 44/47] xfs: fix incorrect log_flushed on fsync Christoph Hellwig
2017-09-17 21:07 ` [PATCH 45/47] xfs: don't set v3 xflags for v2 inodes Christoph Hellwig
2017-09-17 21:07 ` [PATCH 46/47] xfs: open code end_buffer_async_write in xfs_finish_page_writeback Christoph Hellwig
2017-09-17 21:07 ` [PATCH 47/47] xfs: use kmem_free to free return value of kmem_zalloc Christoph Hellwig
2017-09-18  8:22 ` 4.9-stable updates for XFS Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170917210712.10804-26-hch@lst.de \
    --to=hch@lst.de \
    --cc=cmaiolino@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).