From: Dave Chinner <david@fromorbit.com>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 03/14] xfs: repair free space btrees
Date: Mon, 4 Jun 2018 12:12:34 +1000 [thread overview]
Message-ID: <20180604021234.GL10363@dastard> (raw)
In-Reply-To: <152770865831.11611.7628478596629227707.stgit@magnolia>
On Wed, May 30, 2018 at 12:30:58PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Rebuild the free space btrees from the gaps in the rmap btree.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> fs/xfs/Makefile | 1
> fs/xfs/scrub/alloc.c | 1
> fs/xfs/scrub/alloc_repair.c | 430 +++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/scrub/common.c | 8 +
> fs/xfs/scrub/repair.h | 2
> fs/xfs/scrub/scrub.c | 4
> 6 files changed, 442 insertions(+), 4 deletions(-)
> create mode 100644 fs/xfs/scrub/alloc_repair.c
>
>
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 29fe115f29d5..abe035ad0aa4 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -175,6 +175,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
> ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
> xfs-y += $(addprefix scrub/, \
> agheader_repair.o \
> + alloc_repair.o \
> repair.o \
> )
> endif
> diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
> index 941a0a55224e..fe7e8bdf4a52 100644
> --- a/fs/xfs/scrub/alloc.c
> +++ b/fs/xfs/scrub/alloc.c
> @@ -29,7 +29,6 @@
> #include "xfs_log_format.h"
> #include "xfs_trans.h"
> #include "xfs_sb.h"
> -#include "xfs_alloc.h"
> #include "xfs_rmap.h"
> #include "xfs_alloc.h"
> #include "scrub/xfs_scrub.h"
> diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
> new file mode 100644
> index 000000000000..5a81713a69cd
> --- /dev/null
> +++ b/fs/xfs/scrub/alloc_repair.c
> @@ -0,0 +1,430 @@
> +/*
> + * Copyright (C) 2017 Oracle. All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_sb.h"
> +#include "xfs_alloc.h"
> +#include "xfs_alloc_btree.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "xfs_inode.h"
> +#include "xfs_refcount.h"
> +#include "scrub/xfs_scrub.h"
> +#include "scrub/scrub.h"
> +#include "scrub/common.h"
> +#include "scrub/btree.h"
> +#include "scrub/trace.h"
> +#include "scrub/repair.h"
> +
> +/* Free space btree repair. */
Can you add a decription of the algorithm used here.
> +
> +struct xfs_repair_alloc_extent {
> + struct list_head list;
> + xfs_agblock_t bno;
> + xfs_extlen_t len;
> +};
> +
> +struct xfs_repair_alloc {
> + struct list_head extlist;
> + struct xfs_repair_extent_list btlist; /* OWN_AG blocks */
> + struct xfs_repair_extent_list nobtlist; /* rmapbt/agfl blocks */
> + struct xfs_scrub_context *sc;
> + xfs_agblock_t next_bno;
> + uint64_t nr_records;
> +};
> +
> +/* Record extents that aren't in use from gaps in the rmap records. */
> +STATIC int
> +xfs_repair_alloc_extent_fn(
> + struct xfs_btree_cur *cur,
> + struct xfs_rmap_irec *rec,
> + void *priv)
> +{
> + struct xfs_repair_alloc *ra = priv;
> + struct xfs_repair_alloc_extent *rae;
> + struct xfs_buf *bp;
> + xfs_fsblock_t fsb;
> + int i;
> + int error;
> +
> + /* Record all the OWN_AG blocks... */
> + if (rec->rm_owner == XFS_RMAP_OWN_AG) {
> + fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
> + rec->rm_startblock);
> + error = xfs_repair_collect_btree_extent(ra->sc,
> + &ra->btlist, fsb, rec->rm_blockcount);
> + if (error)
> + return error;
> + }
> +
> + /* ...and all the rmapbt blocks... */
> + for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
> + xfs_btree_get_block(cur, i, &bp);
> + if (!bp)
> + continue;
> + fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
> + error = xfs_repair_collect_btree_extent(ra->sc,
> + &ra->nobtlist, fsb, 1);
> + if (error)
> + return error;
> + }
This looks familiar from previous patches, including the magic
bc_ptrs check. factoring opportunity?
> +
> + /* ...and all the free space. */
> + if (rec->rm_startblock > ra->next_bno) {
> + trace_xfs_repair_alloc_extent_fn(cur->bc_mp,
> + cur->bc_private.a.agno,
> + ra->next_bno, rec->rm_startblock - ra->next_bno,
> + XFS_RMAP_OWN_NULL, 0, 0);
> +
> + rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent),
> + KM_MAYFAIL);
> + if (!rae)
> + return -ENOMEM;
> + INIT_LIST_HEAD(&rae->list);
> + rae->bno = ra->next_bno;
> + rae->len = rec->rm_startblock - ra->next_bno;
> + list_add_tail(&rae->list, &ra->extlist);
> + ra->nr_records++;
> + }
> + ra->next_bno = max_t(xfs_agblock_t, ra->next_bno,
> + rec->rm_startblock + rec->rm_blockcount);
> + return 0;
> +}
[....]
> +/* Allocate a block from the (cached) longest extent in the AG. */
> +STATIC xfs_fsblock_t
> +xfs_repair_allocbt_alloc_from_longest(
> + struct xfs_repair_alloc *ra,
> + struct xfs_repair_alloc_extent **longest)
> +{
> + xfs_fsblock_t fsb;
> +
> + if (*longest && (*longest)->len == 0) {
> + list_del(&(*longest)->list);
> + kmem_free(*longest);
> + *longest = NULL;
> + }
> +
> + if (*longest == NULL) {
> + *longest = xfs_repair_allocbt_get_longest(ra);
> + if (*longest == NULL)
> + return NULLFSBLOCK;
> + }
> +
> + fsb = XFS_AGB_TO_FSB(ra->sc->mp, ra->sc->sa.agno, (*longest)->bno);
> + (*longest)->bno++;
> + (*longest)->len--;
What if this makes the longest extent no longer the longest on the
extent list?
> + return fsb;
> +}
> +
> +/* Repair the freespace btrees for some AG. */
> +int
> +xfs_repair_allocbt(
> + struct xfs_scrub_context *sc)
> +{
> + struct xfs_repair_alloc ra;
> + struct xfs_owner_info oinfo;
> + struct xfs_mount *mp = sc->mp;
> + struct xfs_btree_cur *cur = NULL;
> + struct xfs_repair_alloc_extent *longest = NULL;
> + struct xfs_repair_alloc_extent *rae;
> + struct xfs_repair_alloc_extent *n;
> + struct xfs_perag *pag;
> + struct xfs_agf *agf;
> + struct xfs_buf *bp;
> + xfs_fsblock_t bnofsb;
> + xfs_fsblock_t cntfsb;
> + xfs_extlen_t oldf;
> + xfs_extlen_t nr_blocks;
> + xfs_agblock_t agend;
> + int error;
> +
> + /* We require the rmapbt to rebuild anything. */
> + if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
> + return -EOPNOTSUPP;
> +
> + xfs_scrub_perag_get(sc->mp, &sc->sa);
> + pag = sc->sa.pag;
Probably shoulld make xfs_scrub_perag_get() return the pag directly.
> + /*
> + * Make sure the busy extent list is clear because we can't put
> + * extents on there twice.
> + */
> + spin_lock(&pag->pagb_lock);
> + if (pag->pagb_tree.rb_node) {
> + spin_unlock(&pag->pagb_lock);
> + return -EDEADLOCK;
> + }
> + spin_unlock(&pag->pagb_lock);
Can you wrap that up a helper, say, xfs_extent_busy_list_empty()?
if (!xfs_extent_busy_list_empty(pag))
return -EDEADLOCK;
> + /*
> + * Collect all reverse mappings for free extents, and the rmapbt
> + * blocks. We can discover the rmapbt blocks completely from a
> + * query_all handler because there are always rmapbt entries.
> + * (One cannot use on query_all to visit all of a btree's blocks
> + * unless that btree is guaranteed to have at least one entry.)
> + */
> + INIT_LIST_HEAD(&ra.extlist);
> + xfs_repair_init_extent_list(&ra.btlist);
> + xfs_repair_init_extent_list(&ra.nobtlist);
> + ra.next_bno = 0;
> + ra.nr_records = 0;
> + ra.sc = sc;
> +
> + cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
> + error = xfs_rmap_query_all(cur, xfs_repair_alloc_extent_fn, &ra);
> + if (error)
> + goto out;
> + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> + cur = NULL;
> +
> + /* Insert a record for space between the last rmap and EOAG. */
> + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
> + agend = be32_to_cpu(agf->agf_length);
> + if (ra.next_bno < agend) {
> + rae = kmem_alloc(sizeof(struct xfs_repair_alloc_extent),
> + KM_MAYFAIL);
> + if (!rae) {
> + error = -ENOMEM;
> + goto out;
> + }
> + INIT_LIST_HEAD(&rae->list);
> + rae->bno = ra.next_bno;
> + rae->len = agend - ra.next_bno;
> + list_add_tail(&rae->list, &ra.extlist);
> + ra.nr_records++;
> + }
> +
> + /* Collect all the AGFL blocks. */
> + error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
> + sc->sa.agfl_bp, xfs_repair_collect_agfl_block, &ra);
> + if (error)
> + goto out;
> +
> + /* Do we actually have enough space to do this? */
> + nr_blocks = 2 * xfs_allocbt_calc_size(mp, ra.nr_records);
> + if (!xfs_repair_ag_has_space(pag, nr_blocks, XFS_AG_RESV_NONE)) {
> + error = -ENOSPC;
> + goto out;
> + }
> +
> + /* Invalidate all the bnobt/cntbt blocks in btlist. */
> + error = xfs_repair_subtract_extents(sc, &ra.btlist, &ra.nobtlist);
> + if (error)
> + goto out;
> + xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
> + error = xfs_repair_invalidate_blocks(sc, &ra.btlist);
> + if (error)
> + goto out;
So this could be factored in xfs_repair_allocbt_get_free_extents().
> +
> + /* Allocate new bnobt root. */
> + bnofsb = xfs_repair_allocbt_alloc_from_longest(&ra, &longest);
> + if (bnofsb == NULLFSBLOCK) {
> + error = -ENOSPC;
> + goto out;
> + }
> +
> + /* Allocate new cntbt root. */
> + cntfsb = xfs_repair_allocbt_alloc_from_longest(&ra, &longest);
> + if (cntfsb == NULLFSBLOCK) {
> + error = -ENOSPC;
> + goto out;
> + }
> +
> + agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
> + /* Initialize new bnobt root. */
> + error = xfs_repair_init_btblock(sc, bnofsb, &bp, XFS_BTNUM_BNO,
> + &xfs_allocbt_buf_ops);
> + if (error)
> + goto out;
> + agf->agf_roots[XFS_BTNUM_BNOi] =
> + cpu_to_be32(XFS_FSB_TO_AGBNO(mp, bnofsb));
> + agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
> +
> + /* Initialize new cntbt root. */
> + error = xfs_repair_init_btblock(sc, cntfsb, &bp, XFS_BTNUM_CNT,
> + &xfs_allocbt_buf_ops);
> + if (error)
> + goto out;
> + agf->agf_roots[XFS_BTNUM_CNTi] =
> + cpu_to_be32(XFS_FSB_TO_AGBNO(mp, cntfsb));
> + agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
xfs_repair_allocbt_new_btree_roots()
> +
> + /*
> + * Since we're abandoning the old bnobt/cntbt, we have to
> + * decrease fdblocks by the # of blocks in those trees.
> + * btreeblks counts the non-root blocks of the free space
> + * and rmap btrees. Do this before resetting the AGF counters.
> + */
> + oldf = pag->pagf_btreeblks + 2;
> + oldf -= (be32_to_cpu(agf->agf_rmap_blocks) - 1);
> + error = xfs_mod_fdblocks(mp, -(int64_t)oldf, false);
> + if (error)
> + goto out;
> +
> + /* Reset the perag info. */
> + pag->pagf_btreeblks = be32_to_cpu(agf->agf_rmap_blocks) - 1;
> + pag->pagf_freeblks = 0;
> + pag->pagf_longest = 0;
> + pag->pagf_levels[XFS_BTNUM_BNOi] =
> + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
> + pag->pagf_levels[XFS_BTNUM_CNTi] =
> + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
> +
> + /* Now reset the AGF counters. */
> + agf->agf_btreeblks = cpu_to_be32(pag->pagf_btreeblks);
> + agf->agf_freeblks = cpu_to_be32(pag->pagf_freeblks);
> + agf->agf_longest = cpu_to_be32(pag->pagf_longest);
> + xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp,
> + XFS_AGF_ROOTS | XFS_AGF_LEVELS | XFS_AGF_BTREEBLKS |
> + XFS_AGF_LONGEST | XFS_AGF_FREEBLKS);
> + error = xfs_repair_roll_ag_trans(sc);
> + if (error)
> + goto out;
xfs_repair_allocbt_reset_counters()?
> + /*
> + * Insert the longest free extent in case it's necessary to
> + * refresh the AGFL with multiple blocks.
> + */
> + xfs_rmap_skip_owner_update(&oinfo);
> + if (longest && longest->len == 0) {
> + error = xfs_repair_allocbt_free_extent(sc,
> + XFS_AGB_TO_FSB(sc->mp, sc->sa.agno,
> + longest->bno),
> + longest->len, &oinfo);
> + if (error)
> + goto out;
> + list_del(&longest->list);
> + kmem_free(longest);
> + }
> +
> + /* Insert records into the new btrees. */
> + list_sort(NULL, &ra.extlist, xfs_repair_allocbt_extent_cmp);
> + list_for_each_entry_safe(rae, n, &ra.extlist, list) {
> + error = xfs_repair_allocbt_free_extent(sc,
> + XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, rae->bno),
> + rae->len, &oinfo);
> + if (error)
> + goto out;
> + list_del(&rae->list);
> + kmem_free(rae);
> + }
> +
> + /* Add rmap records for the btree roots */
> + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
> + error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
> + XFS_FSB_TO_AGBNO(mp, bnofsb), 1, &oinfo);
> + if (error)
> + goto out;
> + error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
> + XFS_FSB_TO_AGBNO(mp, cntfsb), 1, &oinfo);
> + if (error)
> + goto out;
xfs_repair_allocbt_rebuild_tree()
> +
> + /* Free all the OWN_AG blocks that are not in the rmapbt/agfl. */
> + return xfs_repair_reap_btree_extents(sc, &ra.btlist, &oinfo,
> + XFS_AG_RESV_NONE);
> +out:
> + xfs_repair_cancel_btree_extents(sc, &ra.btlist);
> + xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
> + if (cur)
> + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
> + list_for_each_entry_safe(rae, n, &ra.extlist, list) {
> + list_del(&rae->list);
> + kmem_free(rae);
> + }
> + return error;
> +}
-Dave.
--
Dave Chinner
david@fromorbit.com
next prev parent reply other threads:[~2018-06-04 2:12 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-05-30 19:30 [PATCH v15.2 00/14] xfs-4.18: online repair support Darrick J. Wong
2018-05-30 19:30 ` [PATCH 01/14] xfs: repair the AGF and AGFL Darrick J. Wong
2018-06-04 1:52 ` Dave Chinner
2018-06-05 23:18 ` Darrick J. Wong
2018-06-06 4:06 ` Dave Chinner
2018-06-06 4:56 ` Darrick J. Wong
2018-06-07 0:31 ` Dave Chinner
2018-06-07 4:42 ` Darrick J. Wong
2018-06-08 0:55 ` Dave Chinner
2018-06-08 1:23 ` Darrick J. Wong
2018-05-30 19:30 ` [PATCH 02/14] xfs: repair the AGI Darrick J. Wong
2018-06-04 1:56 ` Dave Chinner
2018-06-05 23:54 ` Darrick J. Wong
2018-05-30 19:30 ` [PATCH 03/14] xfs: repair free space btrees Darrick J. Wong
2018-06-04 2:12 ` Dave Chinner [this message]
2018-06-06 1:50 ` Darrick J. Wong
2018-06-06 3:34 ` Dave Chinner
2018-06-06 4:01 ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 04/14] xfs: repair inode btrees Darrick J. Wong
2018-06-04 3:41 ` Dave Chinner
2018-06-06 3:55 ` Darrick J. Wong
2018-06-06 4:32 ` Dave Chinner
2018-06-06 4:58 ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 05/14] xfs: repair the rmapbt Darrick J. Wong
2018-05-31 5:42 ` Amir Goldstein
2018-06-06 21:13 ` Darrick J. Wong
2018-05-30 19:31 ` [PATCH 06/14] xfs: repair refcount btrees Darrick J. Wong
2018-05-30 19:31 ` [PATCH 07/14] xfs: repair inode records Darrick J. Wong
2018-05-30 19:31 ` [PATCH 08/14] xfs: zap broken inode forks Darrick J. Wong
2018-05-30 19:31 ` [PATCH 09/14] xfs: repair inode block maps Darrick J. Wong
2018-05-30 19:31 ` [PATCH 10/14] xfs: repair damaged symlinks Darrick J. Wong
2018-05-30 19:31 ` [PATCH 11/14] xfs: repair extended attributes Darrick J. Wong
2018-05-30 19:31 ` [PATCH 12/14] xfs: scrub should set preen if attr leaf has holes Darrick J. Wong
2018-05-30 19:32 ` [PATCH 13/14] xfs: repair quotas Darrick J. Wong
2018-05-30 19:32 ` [PATCH 14/14] xfs: implement live quotacheck as part of quota repair Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180604021234.GL10363@dastard \
--to=david@fromorbit.com \
--cc=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox