public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: sandeen@sandeen.net
Cc: linux-xfs@vger.kernel.org,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Brian Foster <bfoster@redhat.com>
Subject: [PATCH 48/58] xfs: introduce fake roots for ag-rooted btrees
Date: Thu,  7 May 2020 14:18:41 +0200	[thread overview]
Message-ID: <20200507121851.304002-49-hch@lst.de> (raw)
In-Reply-To: <20200507121851.304002-1-hch@lst.de>

From: "Darrick J. Wong" <darrick.wong@oracle.com>

Source kernel commit: e06536a692e032470130af5b2136b519595809da

Create an in-core fake root for AG-rooted btree types so that callers
can generate a whole new btree using the upcoming btree bulk load
function without making the new tree accessible from the rest of the
filesystem.  It is up to the individual btree type to provide a function
to create a staged cursor (presumably with the appropriate callouts to
update the fakeroot) and then commit the staged root back into the
filesystem.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/xfs_trace.h        |   1 +
 libxfs/Makefile            |   2 +
 libxfs/xfs_btree.c         |   3 +
 libxfs/xfs_btree.h         |  11 ++-
 libxfs/xfs_btree_staging.c | 179 +++++++++++++++++++++++++++++++++++++
 libxfs/xfs_btree_staging.h |  27 ++++++
 6 files changed, 222 insertions(+), 1 deletion(-)
 create mode 100644 libxfs/xfs_btree_staging.c
 create mode 100644 libxfs/xfs_btree_staging.h

diff --git a/include/xfs_trace.h b/include/xfs_trace.h
index 35c92388..bb409444 100644
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -46,6 +46,7 @@
 #define trace_xfs_btree_corrupt(a,b)		((void) 0)
 #define trace_xfs_btree_updkeys(a,b,c)		((void) 0)
 #define trace_xfs_btree_overlapped_query_range(a,b,c)	((void) 0)
+#define trace_xfs_btree_commit_afakeroot(cur)	((void) 0)
 
 #define trace_xfs_free_extent(a,b,c,d,e,f,g)	((void) 0)
 #define trace_xfs_agf(a,b,c,d)			((void) 0)
diff --git a/libxfs/Makefile b/libxfs/Makefile
index 8c19836f..44b23816 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -29,6 +29,7 @@ HFILES = \
 	xfs_bmap.h \
 	xfs_bmap_btree.h \
 	xfs_btree.h \
+	xfs_btree_staging.h \
 	xfs_attr_remote.h \
 	xfs_cksum.h \
 	xfs_da_btree.h \
@@ -72,6 +73,7 @@ CFILES = cache.c \
 	xfs_bmap.c \
 	xfs_bmap_btree.c \
 	xfs_btree.c \
+	xfs_btree_staging.c \
 	xfs_da_btree.c \
 	xfs_defer.c \
 	xfs_dir2.c \
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 6771c981..a2ab7cf3 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -17,6 +17,7 @@
 #include "xfs_errortag.h"
 #include "xfs_trace.h"
 #include "xfs_alloc.h"
+#include "xfs_btree_staging.h"
 
 /*
  * Cursor allocation zone.
@@ -379,6 +380,8 @@ xfs_btree_del_cursor(
 	/*
 	 * Free the cursor.
 	 */
+	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+		kmem_free((void *)cur->bc_ops);
 	kmem_cache_free(xfs_btree_cur_zone, cur);
 }
 
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index 6faed214..82b906b5 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -179,7 +179,10 @@ union xfs_btree_irec {
 
 /* Per-AG btree information. */
 struct xfs_btree_cur_ag {
-	struct xfs_buf		*agbp;
+	union {
+		struct xfs_buf		*agbp;
+		struct xbtree_afakeroot	*afake;	/* for staging cursor */
+	};
 	xfs_agnumber_t		agno;
 	union {
 		struct {
@@ -238,6 +241,12 @@ typedef struct xfs_btree_cur
 #define XFS_BTREE_LASTREC_UPDATE	(1<<2)	/* track last rec externally */
 #define XFS_BTREE_CRC_BLOCKS		(1<<3)	/* uses extended btree blocks */
 #define XFS_BTREE_OVERLAPPING		(1<<4)	/* overlapping intervals */
+/*
+ * The root of this btree is a fakeroot structure so that we can stage a btree
+ * rebuild without leaving it accessible via primary metadata.  The ops struct
+ * is dynamically allocated and must be freed when the cursor is deleted.
+ */
+#define XFS_BTREE_STAGING		(1<<5)
 
 
 #define	XFS_BTREE_NOERROR	0
diff --git a/libxfs/xfs_btree_staging.c b/libxfs/xfs_btree_staging.c
new file mode 100644
index 00000000..247683c6
--- /dev/null
+++ b/libxfs/xfs_btree_staging.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_btree.h"
+#include "xfs_trace.h"
+#include "xfs_btree_staging.h"
+
+/*
+ * Staging Cursors and Fake Roots for Btrees
+ * =========================================
+ *
+ * A staging btree cursor is a special type of btree cursor that callers must
+ * use to construct a new btree index using the btree bulk loader code.  The
+ * bulk loading code uses the staging btree cursor to abstract the details of
+ * initializing new btree blocks and filling them with records or key/ptr
+ * pairs.  Regular btree operations (e.g. queries and modifications) are not
+ * supported with staging cursors, and callers must not invoke them.
+ *
+ * Fake root structures contain all the information about a btree that is under
+ * construction by the bulk loading code.  Staging btree cursors point to fake
+ * root structures instead of the usual AG header or inode structure.
+ *
+ * Callers are expected to initialize a fake root structure and pass it into
+ * the _stage_cursor function for a specific btree type.  When bulk loading is
+ * complete, callers should call the _commit_staged_btree function for that
+ * specific btree type to commit the new btree into the filesystem.
+ */
+
+/*
+ * Don't allow staging cursors to be duplicated because they're supposed to be
+ * kept private to a single thread.
+ */
+STATIC struct xfs_btree_cur *
+xfs_btree_fakeroot_dup_cursor(
+	struct xfs_btree_cur	*cur)
+{
+	ASSERT(0);
+	return NULL;
+}
+
+/*
+ * Don't allow block allocation for a staging cursor, because staging cursors
+ * do not support regular btree modifications.
+ *
+ * Bulk loading uses a separate callback to obtain new blocks from a
+ * preallocated list, which prevents ENOSPC failures during loading.
+ */
+STATIC int
+xfs_btree_fakeroot_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*start_bno,
+	union xfs_btree_ptr	*new_bno,
+	int			*stat)
+{
+	ASSERT(0);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * Don't allow block freeing for a staging cursor, because staging cursors
+ * do not support regular btree modifications.
+ */
+STATIC int
+xfs_btree_fakeroot_free_block(
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*bp)
+{
+	ASSERT(0);
+	return -EFSCORRUPTED;
+}
+
+/* Initialize a pointer to the root block from the fakeroot. */
+STATIC void
+xfs_btree_fakeroot_init_ptr_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xbtree_afakeroot	*afake;
+
+	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+
+	afake = cur->bc_ag.afake;
+	ptr->s = cpu_to_be32(afake->af_root);
+}
+
+/*
+ * Bulk Loading for AG Btrees
+ * ==========================
+ *
+ * For a btree rooted in an AG header, pass a xbtree_afakeroot structure to the
+ * staging cursor.  Callers should initialize this to zero.
+ *
+ * The _stage_cursor() function for a specific btree type should call
+ * xfs_btree_stage_afakeroot to set up the in-memory cursor as a staging
+ * cursor.  The corresponding _commit_staged_btree() function should log the
+ * new root and call xfs_btree_commit_afakeroot() to transform the staging
+ * cursor into a regular btree cursor.
+ */
+
+/* Update the btree root information for a per-AG fake root. */
+STATIC void
+xfs_btree_afakeroot_set_root(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	int			inc)
+{
+	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
+
+	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+	afake->af_root = be32_to_cpu(ptr->s);
+	afake->af_levels += inc;
+}
+
+/*
+ * Initialize a AG-rooted btree cursor with the given AG btree fake root.
+ * The btree cursor's bc_ops will be overridden as needed to make the staging
+ * functionality work.
+ */
+void
+xfs_btree_stage_afakeroot(
+	struct xfs_btree_cur		*cur,
+	struct xbtree_afakeroot		*afake)
+{
+	struct xfs_btree_ops		*nops;
+
+	ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING));
+	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE));
+	ASSERT(cur->bc_tp == NULL);
+
+	nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
+	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
+	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
+	nops->free_block = xfs_btree_fakeroot_free_block;
+	nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur;
+	nops->set_root = xfs_btree_afakeroot_set_root;
+	nops->dup_cursor = xfs_btree_fakeroot_dup_cursor;
+
+	cur->bc_ag.afake = afake;
+	cur->bc_nlevels = afake->af_levels;
+	cur->bc_ops = nops;
+	cur->bc_flags |= XFS_BTREE_STAGING;
+}
+
+/*
+ * Transform an AG-rooted staging btree cursor back into a regular cursor by
+ * substituting a real btree root for the fake one and restoring normal btree
+ * cursor ops.  The caller must log the btree root change prior to calling
+ * this.
+ */
+void
+xfs_btree_commit_afakeroot(
+	struct xfs_btree_cur		*cur,
+	struct xfs_trans		*tp,
+	struct xfs_buf			*agbp,
+	const struct xfs_btree_ops	*ops)
+{
+	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+	ASSERT(cur->bc_tp == NULL);
+
+	trace_xfs_btree_commit_afakeroot(cur);
+
+	kmem_free((void *)cur->bc_ops);
+	cur->bc_ag.agbp = agbp;
+	cur->bc_ops = ops;
+	cur->bc_flags &= ~XFS_BTREE_STAGING;
+	cur->bc_tp = tp;
+}
diff --git a/libxfs/xfs_btree_staging.h b/libxfs/xfs_btree_staging.h
new file mode 100644
index 00000000..5d78e13d
--- /dev/null
+++ b/libxfs/xfs_btree_staging.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_BTREE_STAGING_H__
+#define __XFS_BTREE_STAGING_H__
+
+/* Fake root for an AG-rooted btree. */
+struct xbtree_afakeroot {
+	/* AG block number of the new btree root. */
+	xfs_agblock_t		af_root;
+
+	/* Height of the new btree. */
+	unsigned int		af_levels;
+
+	/* Number of blocks used by the btree. */
+	unsigned int		af_blocks;
+};
+
+/* Cursor interactions with with fake roots for AG-rooted btrees. */
+void xfs_btree_stage_afakeroot(struct xfs_btree_cur *cur,
+		struct xbtree_afakeroot *afake);
+void xfs_btree_commit_afakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
+		struct xfs_buf *agbp, const struct xfs_btree_ops *ops);
+
+#endif	/* __XFS_BTREE_STAGING_H__ */
-- 
2.26.2


  parent reply	other threads:[~2020-05-07 12:20 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07 12:17 libxfs 5.7 resync Christoph Hellwig
2020-05-07 12:17 ` [PATCH 01/58] xfs: add agf freeblocks verify in xfs_agf_verify Christoph Hellwig
2020-05-07 12:17 ` [PATCH 02/58] xfs: ensure that the inode uid/gid match values match the icdinode ones Christoph Hellwig
2020-05-07 12:17 ` [PATCH 03/58] xfs: remove the icdinode di_uid/di_gid members Christoph Hellwig
2020-05-07 12:17 ` [PATCH 04/58] xfs: remove the kuid/kgid conversion wrappers Christoph Hellwig
2020-05-07 12:17 ` [PATCH 05/58] xfs: fix an undefined behaviour in _da3_path_shift Christoph Hellwig
2020-05-07 12:17 ` [PATCH 06/58] xfs: open code insert range extent split helper Christoph Hellwig
2020-05-07 12:18 ` [PATCH 07/58] xfs: remove the ATTR_INCOMPLETE flag Christoph Hellwig
2020-05-07 12:18 ` [PATCH 08/58] xfs: merge xfs_attr_remove into xfs_attr_set Christoph Hellwig
2020-05-07 12:18 ` [PATCH 09/58] xfs: remove the name == NULL check from xfs_attr_args_init Christoph Hellwig
2020-05-07 12:18 ` [PATCH 10/58] xfs: remove the MAXNAMELEN " Christoph Hellwig
2020-05-07 12:18 ` [PATCH 11/58] xfs: turn xfs_da_args.value into a void pointer Christoph Hellwig
2020-05-07 12:18 ` [PATCH 12/58] xfs: pass an initialized xfs_da_args structure to xfs_attr_set Christoph Hellwig
2020-05-07 12:18 ` [PATCH 13/58] xfs: pass an initialized xfs_da_args to xfs_attr_get Christoph Hellwig
2020-05-07 12:18 ` [PATCH 14/58] xfs: remove the xfs_inode argument to xfs_attr_get_ilocked Christoph Hellwig
2020-05-07 12:18 ` [PATCH 15/58] xfs: remove ATTR_KERNOVAL Christoph Hellwig
2020-05-07 12:18 ` [PATCH 16/58] xfs: remove ATTR_ALLOC and XFS_DA_OP_ALLOCVAL Christoph Hellwig
2020-05-07 12:18 ` [PATCH 17/58] xfs: replace ATTR_KERNOTIME with XFS_DA_OP_NOTIME Christoph Hellwig
2020-05-07 12:18 ` [PATCH 18/58] xfs: factor out a xfs_attr_match helper Christoph Hellwig
2020-05-07 12:18 ` [PATCH 19/58] xfs: cleanup struct xfs_attr_list_context Christoph Hellwig
2020-05-07 12:18 ` [PATCH 20/58] xfs: remove the unused ATTR_ENTRY macro Christoph Hellwig
2020-05-07 12:18 ` [PATCH 21/58] xfs: move the legacy xfs_attr_list to xfs_ioctl.c Christoph Hellwig
2020-05-07 12:18 ` [PATCH 22/58] xfs: rename xfs_attr_list_int to xfs_attr_list Christoph Hellwig
2020-05-07 12:18 ` [PATCH 23/58] xfs: clean up the ATTR_REPLACE checks Christoph Hellwig
2020-05-07 12:18 ` [PATCH 24/58] xfs: clean up the attr flag confusion Christoph Hellwig
2020-05-07 12:18 ` [PATCH 25/58] xfs: remove XFS_DA_OP_INCOMPLETE Christoph Hellwig
2020-05-07 12:18 ` [PATCH 26/58] xfs: embedded the attrlist cursor into struct xfs_attr_list_context Christoph Hellwig
2020-05-07 12:18 ` [PATCH 27/58] xfs: remove the agfl_bno member from struct xfs_agfl Christoph Hellwig
2020-05-07 12:18 ` [PATCH 28/58] xfs: remove the xfs_agfl_t typedef Christoph Hellwig
2020-05-07 12:18 ` [PATCH 29/58] xfs: remove XFS_BUF_TO_AGI Christoph Hellwig
2020-05-07 12:18 ` [PATCH 30/58] xfs: remove XFS_BUF_TO_AGF Christoph Hellwig
2020-05-07 12:18 ` [PATCH 31/58] xfs: remove XFS_BUF_TO_SBP Christoph Hellwig
2020-05-07 12:18 ` [PATCH 32/58] xfs: fix xfs_rmap_has_other_keys usage of ECANCELED Christoph Hellwig
2020-05-07 12:18 ` [PATCH 33/58] xfs: add a function to deal with corrupt buffers post-verifiers Christoph Hellwig
2020-05-07 12:18 ` [PATCH 34/58] xfs: fix buffer corruption reporting when xfs_dir3_free_header_check fails Christoph Hellwig
2020-05-07 12:18 ` [PATCH 35/58] xfs: don't ever return a stale pointer from __xfs_dir3_free_read Christoph Hellwig
2020-05-07 12:18 ` [PATCH 36/58] xfs: check owner of dir3 free blocks Christoph Hellwig
2020-05-07 12:18 ` [PATCH 37/58] xfs: check owner of dir3 data blocks Christoph Hellwig
2020-05-07 12:18 ` [PATCH 38/58] xfs: check owner of dir3 blocks Christoph Hellwig
2020-05-07 12:18 ` [PATCH 39/58] xfs: introduce new private btree cursor names Christoph Hellwig
2020-05-07 12:18 ` [PATCH 40/58] xfs: convert btree cursor ag-private member name Christoph Hellwig
2020-05-07 12:18 ` [PATCH 41/58] xfs: convert btree cursor inode-private member names Christoph Hellwig
2020-05-07 12:18 ` [PATCH 42/58] xfs: rename btree cursor private btree member flags Christoph Hellwig
2020-05-07 12:18 ` [PATCH 43/58] xfs: make btree cursor private union anonymous Christoph Hellwig
2020-05-07 12:18 ` [PATCH 44/58] xfs: make the btree cursor union members named structure Christoph Hellwig
2020-05-07 12:18 ` [PATCH 45/58] xfs: make the btree ag cursor private union anonymous Christoph Hellwig
2020-05-07 12:18 ` [PATCH 46/58] xfs: xfs_dabuf_map should return ENOMEM when map allocation fails Christoph Hellwig
2020-05-07 12:18 ` [PATCH 47/58] xfs: fix incorrect test in xfs_alloc_ag_vextent_lastblock Christoph Hellwig
2020-05-07 12:18 ` Christoph Hellwig [this message]
2020-05-07 12:18 ` [PATCH 49/58] xfs: introduce fake roots for inode-rooted btrees Christoph Hellwig
2020-05-07 12:18 ` [PATCH 50/58] xfs: support bulk loading of staged btrees Christoph Hellwig
2020-05-07 12:18 ` [PATCH 51/58] xfs: add support for free space btree staging cursors Christoph Hellwig
2020-05-07 12:18 ` [PATCH 52/58] xfs: add support for inode " Christoph Hellwig
2020-05-07 12:18 ` [PATCH 53/58] xfs: add support for refcount " Christoph Hellwig
2020-05-07 12:18 ` [PATCH 54/58] xfs: add support for rmap " Christoph Hellwig
2020-05-07 12:18 ` [PATCH 55/58] xfs: add a new xfs_sb_version_has_v3inode helper Christoph Hellwig
2020-05-07 12:18 ` [PATCH 56/58] xfs: only check the superblock version for dinode size calculation Christoph Hellwig
2020-05-07 12:18 ` [PATCH 57/58] xfs: remove the di_version field from struct icdinode Christoph Hellwig
2020-05-07 12:18 ` [PATCH 58/58] xfs: validate the realtime geometry in xfs_validate_sb_common Christoph Hellwig
2020-05-07 12:47 ` libxfs 5.7 resync Eric Sandeen
2020-05-07 15:48 ` Darrick J. Wong
2020-05-07 15:54   ` Darrick J. Wong
2020-05-07 15:54   ` Christoph Hellwig
2020-05-07 16:07     ` Darrick J. Wong
2020-05-07 16:11     ` Christoph Hellwig
2020-05-07 16:12       ` Eric Sandeen
2020-05-07 16:14         ` Christoph Hellwig
2020-05-07 16:22     ` Eric Sandeen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200507121851.304002-49-hch@lst.de \
    --to=hch@lst.de \
    --cc=bfoster@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    --cc=sandeen@sandeen.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox