From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 2/3] xfs: truncate delalloc extents when IO fails in writeback
Date: Thu, 4 Mar 2010 12:46:24 +1100 [thread overview]
Message-ID: <1267667185-7736-3-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1267667185-7736-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
We currently use block_invalidatepage() to clean up pages where I/O
fails in ->writepage(). Unfortunately, if the page has delalloc
regions on it, we fail to remove the delalloc regions when we
invalidate the page. This can result in tripping a BUG() in
xfs_get_blocks() later on if a direct IO read is done on that same
region - the delalloc extent is returned when none is supposed to be
there.
Fix this by truncating away the delalloc regions on the page before
invalidating it. Because they are delalloc, we can do this without
needing a transaction. Indeed - if we get ENOSPC errors, we have to
be able to do this truncation without a transaction as there is
no space left for block reservation (typically why we see a ENOSPC
in writeback).
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_aops.c | 106 +++++++++++++++++++++++++++++++++++++++----
fs/xfs/xfs_bmap.c | 6 ++-
2 files changed, 100 insertions(+), 12 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b493c63..65360b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -39,6 +39,7 @@
#include "xfs_iomap.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
+#include "xfs_bmap.h"
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
@@ -893,6 +894,100 @@ xfs_cluster_write(
}
}
+STATIC void
+xfs_vm_invalidatepage(
+ struct page *page,
+ unsigned long offset)
+{
+ trace_xfs_invalidatepage(page->mapping->host, page, offset);
+ block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that we can trip over later.
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+static void
+xfs_aops_discard_page(
+ struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct buffer_head *bh, *head;
+ loff_t offset = page_offset(page);
+ ssize_t len = 1 << inode->i_blkbits;
+
+ if (!xfs_is_delayed_page(page, IOMAP_DELAY))
+ goto out_invalidate;
+
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard on page %p, inode 0x%llx, offset %llu.",
+ page, ip->i_ino, offset);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ bh = head = page_buffers(page);
+ do {
+ int done;
+ xfs_fileoff_t offset_fsb;
+ xfs_bmbt_irec_t imap;
+ int nimaps = 1;
+ int error;
+
+ if (!buffer_delay(bh))
+ goto next_buffer;
+
+ offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+
+ /*
+ * Map the range first and check that it is a delalloc extent
+ * before trying to unmap the range. Otherwise we will be
+ * trying to remove a real extent (which requires a
+ * transaction) or a hole, which is probably a bad idea...
+ */
+ error = xfs_bmapi(NULL, ip, offset_fsb, 1,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap,
+ &nimaps, NULL, NULL);
+
+ if (error) {
+ /* something screwed, just bail */
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard failed delalloc mapping lookup.");
+ break;
+ }
+ if (!nimaps) {
+ /* nothing there */
+ goto next_buffer;
+ }
+ if (imap.br_startblock != DELAYSTARTBLOCK) {
+ /* been converted, ignore */
+ goto next_buffer;
+ }
+ WARN_ON(imap.br_blockcount == 0);
+
+ error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, NULL,
+ NULL, NULL, &done);
+
+ if (error) {
+ /* something screwed, just bail */
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "page discard unable to remove delalloc mapping.");
+ break;
+ }
+next_buffer:
+ offset += len;
+
+ } while ((bh = bh->b_this_page) != head);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+ xfs_vm_invalidatepage(page, 0);
+ return;
+}
+
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it's buffers. When called with startio set then
@@ -1144,7 +1239,7 @@ error:
*/
if (err != -EAGAIN) {
if (!unmapped)
- block_invalidatepage(page, 0);
+ xfs_aops_discard_page(page);
ClearPageUptodate(page);
}
return err;
@@ -1554,15 +1649,6 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
-STATIC void
-xfs_vm_invalidatepage(
- struct page *page,
- unsigned long offset)
-{
- trace_xfs_invalidatepage(page->mapping->host, page, offset);
- block_invalidatepage(page, offset);
-}
-
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 95a4813..7afd475 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5218,7 +5218,8 @@ xfs_bunmapi(
if (ifp->if_flags & XFS_IFBROOT) {
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_private.b.firstblock = *firstblock;
+ cur->bc_private.b.firstblock = firstblock ?
+ *firstblock : NULLFSBLOCK;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
} else
@@ -5503,7 +5504,8 @@ error0:
xfs_trans_log_inode(tp, ip, logflags);
if (cur) {
if (!error) {
- *firstblock = cur->bc_private.b.firstblock;
+ if (firstblock)
+ *firstblock = cur->bc_private.b.firstblock;
cur->bc_private.b.allocated = 0;
}
xfs_btree_del_cursor(cur,
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-03-04 1:45 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-04 1:46 [PATCH 0/3] xfs: delayed allocation @ ENOSPC fixes Dave Chinner
2010-03-04 1:46 ` [PATCH 1/3] xfs: check for more work before sleeping in xfssyncd Dave Chinner
2010-03-04 13:50 ` Christoph Hellwig
2010-03-04 22:56 ` Dave Chinner
2010-03-04 1:46 ` Dave Chinner [this message]
2010-03-04 14:28 ` [PATCH 2/3] xfs: truncate delalloc extents when IO fails in writeback Christoph Hellwig
2010-03-04 22:03 ` Dave Chinner
2010-03-05 2:00 ` [PATCH v2] " Dave Chinner
2010-03-05 9:21 ` Christoph Hellwig
2010-03-10 9:12 ` [PATCH 2/3] " Christoph Hellwig
2010-03-10 12:52 ` Dave Chinner
2010-03-10 18:18 ` Christoph Hellwig
2010-03-04 1:46 ` [PATCH 3/3] xfs: Increase the default size of the reserved blocks pool Dave Chinner
2010-03-05 15:45 ` Alex Elder
2010-03-05 23:26 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1267667185-7736-3-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox