From: Brian Foster <bfoster@redhat.com>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 7/9] libxfs: don't discard dirty buffers
Date: Tue, 5 Jan 2016 13:34:08 -0500 [thread overview]
Message-ID: <20160105183407.GC38749@bfoster.bfoster> (raw)
In-Reply-To: <1450733829-9319-8-git-send-email-david@fromorbit.com>
On Tue, Dec 22, 2015 at 08:37:07AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> When we release a buffer from the cache, if it is dirty we wite it
> to disk then put the buffer on the free list for recycling. However,
> if the write fails (e.g. verifier failure due unfixed corruption) we
> effectively throw the buffer and it contents away. This causes all
> sorts of problems for xfs_repair as it then re-reads the buffer from
> disk on the next access and hence loses all the corrections that had
> previously been made, resulting in tripping over corruptions in code
> that assumes the corruptions have already been fixed/flagged in the
> buffer it receives.
>
> TO fix this, we have to make the cache aware that writes can fail,
> and keep the buffer in cache when writes fail. Hence we have to add
> an explicit error notification to the flush operation, and we need
> to do that before we release the buffer to the free list. This also
> means that we need to remove the writeback code from the release
> mechanisms, instead replacing them with assertions that the buffers
> are already clean.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
Reviewed-by: Brian Foster <bfoster@redhat.com>
> include/cache.h | 2 +-
> libxfs/cache.c | 15 ++++++++++++++-
> libxfs/rdwr.c | 44 +++++++++++++++++++++++++++-----------------
> 3 files changed, 42 insertions(+), 19 deletions(-)
>
> diff --git a/include/cache.h b/include/cache.h
> index 0a84c69..87826be 100644
> --- a/include/cache.h
> +++ b/include/cache.h
> @@ -64,7 +64,7 @@ typedef void *cache_key_t;
>
> typedef void (*cache_walk_t)(struct cache_node *);
> typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t);
> -typedef void (*cache_node_flush_t)(struct cache_node *);
> +typedef int (*cache_node_flush_t)(struct cache_node *);
> typedef void (*cache_node_relse_t)(struct cache_node *);
> typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int,
> unsigned int);
> diff --git a/libxfs/cache.c b/libxfs/cache.c
> index 4753a1d..a48ebd9 100644
> --- a/libxfs/cache.c
> +++ b/libxfs/cache.c
> @@ -219,6 +219,12 @@ cache_shake(
> if (pthread_mutex_trylock(&node->cn_mutex) != 0)
> continue;
>
> + /* can't release dirty objects */
> + if (cache->flush(node)) {
> + pthread_mutex_unlock(&node->cn_mutex);
> + continue;
> + }
> +
> hash = cache->c_hash + node->cn_hashidx;
> if (pthread_mutex_trylock(&hash->ch_mutex) != 0) {
> pthread_mutex_unlock(&node->cn_mutex);
> @@ -311,6 +317,13 @@ __cache_node_purge(
> pthread_mutex_unlock(&node->cn_mutex);
> return count;
> }
> +
> + /* can't purge dirty objects */
> + if (cache->flush(node)) {
> + pthread_mutex_unlock(&node->cn_mutex);
> + return 1;
> + }
> +
> mru = &cache->c_mrus[node->cn_priority];
> pthread_mutex_lock(&mru->cm_mutex);
> list_del_init(&node->cn_mru);
> @@ -321,7 +334,7 @@ __cache_node_purge(
> pthread_mutex_destroy(&node->cn_mutex);
> list_del_init(&node->cn_hash);
> cache->relse(node);
> - return count;
> + return 0;
> }
>
> /*
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 7a04985..0337a21 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -659,6 +659,8 @@ __libxfs_getbufr(int blen)
> bp = kmem_zone_zalloc(xfs_buf_zone, 0);
> pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
> bp->b_ops = NULL;
> + if (bp->b_flags & LIBXFS_B_DIRTY)
> + fprintf(stderr, "found dirty buffer (bulk) on free list!");
>
> return bp;
> }
> @@ -1223,23 +1225,26 @@ libxfs_iomove(xfs_buf_t *bp, uint boff, int len, void *data, int flags)
> }
>
> static void
> -libxfs_brelse(struct cache_node *node)
> +libxfs_brelse(
> + struct cache_node *node)
> {
> - xfs_buf_t *bp = (xfs_buf_t *)node;
> + struct xfs_buf *bp = (struct xfs_buf *)node;
>
> - if (bp != NULL) {
> - if (bp->b_flags & LIBXFS_B_DIRTY)
> - libxfs_writebufr(bp);
> - pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
> - list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
> - pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
> - }
> + if (!bp)
> + return;
> + if (bp->b_flags & LIBXFS_B_DIRTY)
> + fprintf(stderr,
> + "releasing dirty buffer to free list!");
> +
> + pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
> + list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
> + pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
> }
>
> static unsigned int
> libxfs_bulkrelse(
> - struct cache *cache,
> - struct list_head *list)
> + struct cache *cache,
> + struct list_head *list)
> {
> xfs_buf_t *bp;
> int count = 0;
> @@ -1249,7 +1254,8 @@ libxfs_bulkrelse(
>
> list_for_each_entry(bp, list, b_node.cn_mru) {
> if (bp->b_flags & LIBXFS_B_DIRTY)
> - libxfs_writebufr(bp);
> + fprintf(stderr,
> + "releasing dirty buffer (bulk) to free list!");
> count++;
> }
>
> @@ -1260,18 +1266,22 @@ libxfs_bulkrelse(
> return count;
> }
>
> -static void
> -libxfs_bflush(struct cache_node *node)
> +static int
> +libxfs_bflush(
> + struct cache_node *node)
> {
> - xfs_buf_t *bp = (xfs_buf_t *)node;
> + struct xfs_buf *bp = (struct xfs_buf *)node;
>
> - if ((bp != NULL) && (bp->b_flags & LIBXFS_B_DIRTY))
> - libxfs_writebufr(bp);
> + if (bp->b_flags & LIBXFS_B_DIRTY)
> + return libxfs_writebufr(bp);
> + return 0;
> }
>
> void
> libxfs_putbufr(xfs_buf_t *bp)
> {
> + if (bp->b_flags & LIBXFS_B_DIRTY)
> + libxfs_writebufr(bp);
> libxfs_brelse((struct cache_node *)bp);
> }
>
> --
> 2.5.0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2016-01-05 18:34 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-21 21:37 [PATCH 0/9] xfsprogs: big, broken filesystems cause pain Dave Chinner
2015-12-21 21:37 ` [PATCH 1/9] metadump: clean up btree block region zeroing Dave Chinner
2016-01-04 19:11 ` Brian Foster
2015-12-21 21:37 ` [PATCH 2/9] metadump: bounds check btree block regions being zeroed Dave Chinner
2016-01-04 19:11 ` Brian Foster
2015-12-21 21:37 ` [PATCH 3/9] xfs_mdrestore: correctly account bytes read Dave Chinner
2016-01-04 19:12 ` Brian Foster
2015-12-21 21:37 ` [PATCH 4/9] repair: parallelise phase 7 Dave Chinner
2016-01-04 19:12 ` Brian Foster
2015-12-21 21:37 ` [PATCH 5/9] repair: parallelise uncertin inode processing in phase 3 Dave Chinner
2016-01-04 19:12 ` Brian Foster
2015-12-21 21:37 ` [PATCH 6/9] libxfs: directory node splitting does not have an extra block Dave Chinner
2016-01-05 18:34 ` Brian Foster
2016-01-05 22:07 ` Dave Chinner
2015-12-21 21:37 ` [PATCH 7/9] libxfs: don't discard dirty buffers Dave Chinner
2016-01-05 18:34 ` Brian Foster [this message]
2015-12-21 21:37 ` [PATCH 8/9] libxfs: don't repeatedly shake unwritable buffers Dave Chinner
2016-01-05 18:34 ` Brian Foster
2015-12-21 21:37 ` [PATCH 9/9] libxfs: keep unflushable buffers off the cache MRUs Dave Chinner
2016-01-05 18:34 ` Brian Foster
2016-01-05 23:58 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160105183407.GC38749@bfoster.bfoster \
--to=bfoster@redhat.com \
--cc=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox