Re: [PATCH 03/14] xfs: repair free space btrees

Linux XFS filesystem development
 help / color / mirror / Atom feed

From: Dave Chinner <david@fromorbit.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 03/14] xfs: repair free space btrees
Date: Mon, 4 Jun 2018 12:12:34 +1000	[thread overview]
Message-ID: <20180604021234.GL10363@dastard> (raw)
In-Reply-To: <152770865831.11611.7628478596629227707.stgit@magnolia>

On Wed, May 30, 2018 at 12:30:58PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Rebuild the free space btrees from the gaps in the rmap btree.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Makefile             |    1 
>  fs/xfs/scrub/alloc.c        |    1 
>  fs/xfs/scrub/alloc_repair.c |  430 +++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/scrub/common.c       |    8 +
>  fs/xfs/scrub/repair.h       |    2 
>  fs/xfs/scrub/scrub.c        |    4 
>  6 files changed, 442 insertions(+), 4 deletions(-)
>  create mode 100644 fs/xfs/scrub/alloc_repair.c
> 
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 29fe115f29d5..abe035ad0aa4 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -175,6 +175,7 @@ xfs-$(CONFIG_XFS_QUOTA)		+= scrub/quota.o
>  ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
>  xfs-y				+= $(addprefix scrub/, \
>  				   agheader_repair.o \
> +				   alloc_repair.o \
>  				   repair.o \
>  				   )
>  endif
> diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
> index 941a0a55224e..fe7e8bdf4a52 100644
> --- a/fs/xfs/scrub/alloc.c
> +++ b/fs/xfs/scrub/alloc.c
> @@ -29,7 +29,6 @@
>  #include "xfs_log_format.h"
>  #include "xfs_trans.h"
>  #include "xfs_sb.h"
> -#include "xfs_alloc.h"
>  #include "xfs_rmap.h"
>  #include "xfs_alloc.h"
>  #include "scrub/xfs_scrub.h"
> diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
> new file mode 100644
> index 000000000000..5a81713a69cd
> --- /dev/null
> +++ b/fs/xfs/scrub/alloc_repair.c
> @@ -0,0 +1,430 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_sb.h"
> +#include "xfs_alloc.h"
> +#include "xfs_alloc_btree.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "xfs_inode.h"
> +#include "xfs_refcount.h"
> +#include "scrub/xfs_scrub.h"
> +#include "scrub/scrub.h"
> +#include "scrub/common.h"
> +#include "scrub/btree.h"
> +#include "scrub/trace.h"
> +#include "scrub/repair.h"
> +
> +/* Free space btree repair. */

Can you add a decription of the algorithm used here.

> +
> +struct xfs_repair_alloc_extent {
> +	struct list_head		list;
> +	xfs_agblock_t			bno;
> +	xfs_extlen_t			len;
> +};
> +
> +struct xfs_repair_alloc {
> +	struct list_head		extlist;
> +	struct xfs_repair_extent_list	btlist;	  /* OWN_AG blocks */
> +	struct xfs_repair_extent_list	nobtlist; /* rmapbt/agfl blocks */
> +	struct xfs_scrub_context	*sc;
> +	xfs_agblock_t			next_bno;
> +	uint64_t			nr_records;
> +};
> +
> +/* Record extents that aren't in use from gaps in the rmap records. */
> +STATIC int
> +xfs_repair_alloc_extent_fn(
> +	struct xfs_btree_cur		*cur,
> +	struct xfs_rmap_irec		*rec,
> +	void				*priv)
> +{
> +	struct xfs_repair_alloc		*ra = priv;
> +	struct xfs_repair_alloc_extent	*rae;
> +	struct xfs_buf			*bp;
> +	xfs_fsblock_t			fsb;
> +	int				i;
> +	int				error;
> +
> +	/* Record all the OWN_AG blocks... */
> +	if (rec->rm_owner == XFS_RMAP_OWN_AG) {
> +		fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
> +				rec->rm_startblock);
> +		error = xfs_repair_collect_btree_extent(ra->sc,
> +				&ra->btlist, fsb, rec->rm_blockcount);
> +		if (error)
> +			return error;
> +	}
> +
> +	/* ...and all the rmapbt blocks... */
> +	for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
> +		xfs_btree_get_block(cur, i, &bp);
> +		if (!bp)
> +			continue;
> +		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
> +		error = xfs_repair_collect_btree_extent(ra->sc,
> +				&ra->nobtlist, fsb, 1);
> +		if (error)
> +			return error;
> +	}

This looks familiar from previous patches, including the magic
bc_ptrs check. factoring opportunity?

> +
> +	/* ...and all the free space. */
> +	if (rec->rm_startblock > ra->next_bno) {
> +		trace_xfs_repair_alloc_extent_fn(cur->bc_mp,
> +				cur->bc_private.a.agno,
> +				ra->next_bno, rec->rm_startblock - ra->next_bno,
> +				XFS_RMAP_OWN_NULL, 0, 0);
> +
> +		rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent),
> +				KM_MAYFAIL);
> +		if (!rae)
> +			return -ENOMEM;
> +		INIT_LIST_HEAD(&rae->list);
> +		rae->bno = ra->next_bno;
> +		rae->len = rec->rm_startblock - ra->next_bno;
> +		list_add_tail(&rae->list, &ra->extlist);
> +		ra->nr_records++;
> +	}
> +	ra->next_bno = max_t(xfs_agblock_t, ra->next_bno,
> +			rec->rm_startblock + rec->rm_blockcount);
> +	return 0;
> +}

[....]

> +/* Allocate a block from the (cached) longest extent in the AG. */
> +STATIC xfs_fsblock_t
> +xfs_repair_allocbt_alloc_from_longest(
> +	struct xfs_repair_alloc		*ra,
> +	struct xfs_repair_alloc_extent	**longest)
> +{
> +	xfs_fsblock_t			fsb;
> +
> +	if (*longest && (*longest)->len == 0) {
> +		list_del(&(*longest)->list);
> +		kmem_free(*longest);
> +		*longest = NULL;
> +	}
> +
> +	if (*longest == NULL) {
> +		*longest = xfs_repair_allocbt_get_longest(ra);
> +		if (*longest == NULL)
> +			return NULLFSBLOCK;
> +	}
> +
> +	fsb = XFS_AGB_TO_FSB(ra->sc->mp, ra->sc->sa.agno, (*longest)->bno);
> +	(*longest)->bno++;
> +	(*longest)->len--;

What if this makes the longest extent no longer the longest on the
extent list?

> +	return fsb;
> +}
> +
> +/* Repair the freespace btrees for some AG. */
> +int
> +xfs_repair_allocbt(
> +	struct xfs_scrub_context	*sc)
> +{
> +	struct xfs_repair_alloc		ra;
> +	struct xfs_owner_info		oinfo;
> +	struct xfs_mount		*mp = sc->mp;
> +	struct xfs_btree_cur		*cur = NULL;
> +	struct xfs_repair_alloc_extent	*longest = NULL;
> +	struct xfs_repair_alloc_extent	*rae;
> +	struct xfs_repair_alloc_extent	*n;
> +	struct xfs_perag		*pag;
> +	struct xfs_agf			*agf;
> +	struct xfs_buf			*bp;
> +	xfs_fsblock_t			bnofsb;
> +	xfs_fsblock_t			cntfsb;
> +	xfs_extlen_t			oldf;
> +	xfs_extlen_t			nr_blocks;
> +	xfs_agblock_t			agend;
> +	int				error;
> +
> +	/* We require the rmapbt to rebuild anything. */
> +	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
> +		return -EOPNOTSUPP;
> +
> +	xfs_scrub_perag_get(sc->mp, &sc->sa);
> +	pag = sc->sa.pag;

Probably shoulld make xfs_scrub_perag_get() return the pag directly.

> +	/*
> +	 * Make sure the busy extent list is clear because we can't put
> +	 * extents on there twice.
> +	 */
> +	spin_lock(&pag->pagb_lock);
> +	if (pag->pagb_tree.rb_node) {
> +		spin_unlock(&pag->pagb_lock);
> +		return -EDEADLOCK;
> +	}
> +	spin_unlock(&pag->pagb_lock);

Can you wrap that up a helper, say, xfs_extent_busy_list_empty()?

	if (!xfs_extent_busy_list_empty(pag))
		return -EDEADLOCK;

> +	/*
> +	 * Collect all reverse mappings for free extents, and the rmapbt
> +	 * blocks.  We can discover the rmapbt blocks completely from a
> +	 * query_all handler because there are always rmapbt entries.
> +	 * (One cannot use on query_all to visit all of a btree's blocks
> +	 * unless that btree is guaranteed to have at least one entry.)
> +	 */
> +	INIT_LIST_HEAD(&ra.extlist);
> +	xfs_repair_init_extent_list(&ra.btlist);
> +	xfs_repair_init_extent_list(&ra.nobtlist);
> +	ra.next_bno = 0;
> +	ra.nr_records = 0;
> +	ra.sc = sc;
> +
> +	cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
> +	error = xfs_rmap_query_all(cur, xfs_repair_alloc_extent_fn, &ra);
> +	if (error)
> +		goto out;
> +	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> +	cur = NULL;
> +
> +	/* Insert a record for space between the last rmap and EOAG. */
> +	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
> +	agend = be32_to_cpu(agf->agf_length);
> +	if (ra.next_bno < agend) {
> +		rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent),
> +				KM_MAYFAIL);
> +		if (!rae) {
> +			error = -ENOMEM;
> +			goto out;
> +		}
> +		INIT_LIST_HEAD(&rae->list);
> +		rae->bno = ra.next_bno;
> +		rae->len = agend - ra.next_bno;
> +		list_add_tail(&rae->list, &ra.extlist);
> +		ra.nr_records++;
> +	}
> +
> +	/* Collect all the AGFL blocks. */
> +	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
> +			sc->sa.agfl_bp, xfs_repair_collect_agfl_block, &ra);
> +	if (error)
> +		goto out;
> +
> +	/* Do we actually have enough space to do this? */
> +	nr_blocks = 2 * xfs_allocbt_calc_size(mp, ra.nr_records);
> +	if (!xfs_repair_ag_has_space(pag, nr_blocks, XFS_AG_RESV_NONE)) {
> +		error = -ENOSPC;
> +		goto out;
> +	}
> +
> +	/* Invalidate all the bnobt/cntbt blocks in btlist. */
> +	error = xfs_repair_subtract_extents(sc, &ra.btlist, &ra.nobtlist);
> +	if (error)
> +		goto out;
> +	xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
> +	error = xfs_repair_invalidate_blocks(sc, &ra.btlist);
> +	if (error)
> +		goto out;

So this could be factored in xfs_repair_allocbt_get_free_extents().

> +
> +	/* Allocate new bnobt root. */
> +	bnofsb = xfs_repair_allocbt_alloc_from_longest(&ra, &longest);
> +	if (bnofsb == NULLFSBLOCK) {
> +		error = -ENOSPC;
> +		goto out;
> +	}
> +
> +	/* Allocate new cntbt root. */
> +	cntfsb = xfs_repair_allocbt_alloc_from_longest(&ra, &longest);
> +	if (cntfsb == NULLFSBLOCK) {
> +		error = -ENOSPC;
> +		goto out;
> +	}
> +
> +	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
> +	/* Initialize new bnobt root. */
> +	error = xfs_repair_init_btblock(sc, bnofsb, &bp, XFS_BTNUM_BNO,
> +			&xfs_allocbt_buf_ops);
> +	if (error)
> +		goto out;
> +	agf->agf_roots[XFS_BTNUM_BNOi] =
> +			cpu_to_be32(XFS_FSB_TO_AGBNO(mp, bnofsb));
> +	agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
> +
> +	/* Initialize new cntbt root. */
> +	error = xfs_repair_init_btblock(sc, cntfsb, &bp, XFS_BTNUM_CNT,
> +			&xfs_allocbt_buf_ops);
> +	if (error)
> +		goto out;
> +	agf->agf_roots[XFS_BTNUM_CNTi] =
> +			cpu_to_be32(XFS_FSB_TO_AGBNO(mp, cntfsb));
> +	agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);

xfs_repair_allocbt_new_btree_roots()

> +
> +	/*
> +	 * Since we're abandoning the old bnobt/cntbt, we have to
> +	 * decrease fdblocks by the # of blocks in those trees.
> +	 * btreeblks counts the non-root blocks of the free space
> +	 * and rmap btrees.  Do this before resetting the AGF counters.
> +	 */
> +	oldf = pag->pagf_btreeblks + 2;
> +	oldf -= (be32_to_cpu(agf->agf_rmap_blocks) - 1);
> +	error = xfs_mod_fdblocks(mp, -(int64_t)oldf, false);
> +	if (error)
> +		goto out;
> +
> +	/* Reset the perag info. */
> +	pag->pagf_btreeblks = be32_to_cpu(agf->agf_rmap_blocks) - 1;
> +	pag->pagf_freeblks = 0;
> +	pag->pagf_longest = 0;
> +	pag->pagf_levels[XFS_BTNUM_BNOi] =
> +			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
> +	pag->pagf_levels[XFS_BTNUM_CNTi] =
> +			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
> +
> +	/* Now reset the AGF counters. */
> +	agf->agf_btreeblks = cpu_to_be32(pag->pagf_btreeblks);
> +	agf->agf_freeblks = cpu_to_be32(pag->pagf_freeblks);
> +	agf->agf_longest = cpu_to_be32(pag->pagf_longest);
> +	xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp,
> +			XFS_AGF_ROOTS | XFS_AGF_LEVELS | XFS_AGF_BTREEBLKS |
> +			XFS_AGF_LONGEST | XFS_AGF_FREEBLKS);
> +	error = xfs_repair_roll_ag_trans(sc);
> +	if (error)
> +		goto out;

xfs_repair_allocbt_reset_counters()?

> +	/*
> +	 * Insert the longest free extent in case it's necessary to
> +	 * refresh the AGFL with multiple blocks.
> +	 */
> +	xfs_rmap_skip_owner_update(&oinfo);
> +	if (longest && longest->len == 0) {
> +		error = xfs_repair_allocbt_free_extent(sc,
> +				XFS_AGB_TO_FSB(sc->mp, sc->sa.agno,
> +					longest->bno),
> +				longest->len, &oinfo);
> +		if (error)
> +			goto out;
> +		list_del(&longest->list);
> +		kmem_free(longest);
> +	}
> +
> +	/* Insert records into the new btrees. */
> +	list_sort(NULL, &ra.extlist, xfs_repair_allocbt_extent_cmp);
> +	list_for_each_entry_safe(rae, n, &ra.extlist, list) {
> +		error = xfs_repair_allocbt_free_extent(sc,
> +				XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae->bno),
> +				rae->len, &oinfo);
> +		if (error)
> +			goto out;
> +		list_del(&rae->list);
> +		kmem_free(rae);
> +	}
> +
> +	/* Add rmap records for the btree roots */
> +	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
> +	error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
> +			XFS_FSB_TO_AGBNO(mp, bnofsb), 1, &oinfo);
> +	if (error)
> +		goto out;
> +	error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
> +			XFS_FSB_TO_AGBNO(mp, cntfsb), 1, &oinfo);
> +	if (error)
> +		goto out;

xfs_repair_allocbt_rebuild_tree()

> +
> +	/* Free all the OWN_AG blocks that are not in the rmapbt/agfl. */
> +	return xfs_repair_reap_btree_extents(sc, &ra.btlist, &oinfo,
> +			XFS_AG_RESV_NONE);
> +out:
> +	xfs_repair_cancel_btree_extents(sc, &ra.btlist);
> +	xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
> +	if (cur)
> +		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
> +	list_for_each_entry_safe(rae, n, &ra.extlist, list) {
> +		list_del(&rae->list);
> +		kmem_free(rae);
> +	}
> +	return error;
> +}

-Dave.
-- 
Dave Chinner
david@fromorbit.com

next prev parent reply	other threads:[~2018-06-04  2:12 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-30 19:30 [PATCH v15.2 00/14] xfs-4.18: online repair support Darrick J. Wong
2018-05-30 19:30 ` [PATCH 01/14] xfs: repair the AGF and AGFL Darrick J. Wong
2018-06-04  1:52   ` Dave Chinner
2018-06-05 23:18     ` Darrick J. Wong
2018-06-06  4:06       ` Dave Chinner
2018-06-06  4:56         ` Darrick J. Wong
2018-06-07  0:31           ` Dave Chinner
2018-06-07  4:42             ` Darrick J. Wong
2018-06-08  0:55               ` Dave Chinner
2018-06-08  1:23                 ` Darrick J. Wong
2018-05-30 19:30 ` [PATCH 02/14] xfs: repair the AGI Darrick J. Wong
2018-06-04  1:56   ` Dave Chinner
2018-06-05 23:54     ` Darrick J. Wong
2018-05-30 19:30 ` [PATCH 03/14] xfs: repair free space btrees Darrick J. Wong
2018-06-04  2:12   ` Dave Chinner [this message]
2018-06-06  1:50     ` Darrick J. Wong
2018-06-06  3:34       ` Dave Chinner
2018-06-06  4:01         ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 04/14] xfs: repair inode btrees Darrick J. Wong
2018-06-04  3:41   ` Dave Chinner
2018-06-06  3:55     ` Darrick J. Wong
2018-06-06  4:32       ` Dave Chinner
2018-06-06  4:58         ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 05/14] xfs: repair the rmapbt Darrick J. Wong
2018-05-31  5:42   ` Amir Goldstein
2018-06-06 21:13     ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 06/14] xfs: repair refcount btrees Darrick J. Wong
2018-05-30 19:31 ` [PATCH 07/14] xfs: repair inode records Darrick J. Wong
2018-05-30 19:31 ` [PATCH 08/14] xfs: zap broken inode forks Darrick J. Wong
2018-05-30 19:31 ` [PATCH 09/14] xfs: repair inode block maps Darrick J. Wong
2018-05-30 19:31 ` [PATCH 10/14] xfs: repair damaged symlinks Darrick J. Wong
2018-05-30 19:31 ` [PATCH 11/14] xfs: repair extended attributes Darrick J. Wong
2018-05-30 19:31 ` [PATCH 12/14] xfs: scrub should set preen if attr leaf has holes Darrick J. Wong
2018-05-30 19:32 ` [PATCH 13/14] xfs: repair quotas Darrick J. Wong
2018-05-30 19:32 ` [PATCH 14/14] xfs: implement live quotacheck as part of quota repair Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180604021234.GL10363@dastard \
    --to=david@fromorbit.com \
    --cc=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox