All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: xfs@oss.sgi.com
Subject: [PATCH v4 06/20] xfs: use and update the finobt on inode allocation
Date: Wed,  7 May 2014 08:21:45 -0400	[thread overview]
Message-ID: <1399465319-65066-7-git-send-email-bfoster@redhat.com> (raw)
In-Reply-To: <1399465319-65066-1-git-send-email-bfoster@redhat.com>

Replace xfs_dialloc_ag() with an implementation that looks for a
record in the finobt. The finobt only tracks records with at least
one free inode. This eliminates the need for the intra-ag scan in
the original algorithm. Once the inode is allocated, update the
finobt appropriately (possibly removing the record) as well as the
inobt.

Move the original xfs_dialloc_ag() algorithm to
xfs_dialloc_ag_slow() and fall back as such if finobt support is
not enabled.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 libxfs/xfs_ialloc.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 210 insertions(+), 1 deletion(-)

diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index c5759aa..462340b 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -699,7 +699,7 @@ xfs_ialloc_get_rec(
  * available.
  */
 STATIC int
-xfs_dialloc_ag(
+xfs_dialloc_ag_slow(
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agbp,
 	xfs_ino_t		parent,
@@ -957,6 +957,215 @@ error0:
 	return error;
 }
 
+STATIC int
+xfs_dialloc_ag(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_ino_t		parent,
+	xfs_ino_t		*inop)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp);
+	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno);
+	xfs_agnumber_t			pagno = XFS_INO_TO_AGNO(mp, parent);
+	xfs_agino_t			pagino = XFS_INO_TO_AGINO(mp, parent);
+	struct xfs_perag		*pag;
+	struct xfs_btree_cur		*cur;
+	struct xfs_btree_cur		*tcur;
+	struct xfs_inobt_rec_incore	rec;
+	struct xfs_inobt_rec_incore	trec;
+	xfs_ino_t			ino;
+	int				error;
+	int				offset;
+	int				i, j;
+
+	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+		return xfs_dialloc_ag_slow(tp, agbp, parent, inop);
+
+	pag = xfs_perag_get(mp, agno);
+
+	/*
+	 * If pagino is 0 (this is the root inode allocation) use newino.
+	 * This must work because we've just allocated some.
+	 */
+	if (!pagino)
+		pagino = be32_to_cpu(agi->agi_newino);
+
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+	error = xfs_check_agi_freecount(cur, agi);
+	if (error)
+		goto error_cur;
+
+	if (agno == pagno) {
+		/*
+		 * We're in the same AG as the parent inode so allocate the
+		 * closest inode to the parent.
+		 */
+		error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
+		if (error)
+			goto error_cur;
+		if (i == 1) {
+			error = xfs_inobt_get_rec(cur, &rec, &i);
+			if (error)
+				goto error_cur;
+			XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
+
+			/*
+			 * See if we've landed in the parent inode record. The
+			 * finobt only tracks chunks with at least one free
+			 * inode, so record existence is enough.
+			 */
+			if (pagino >= rec.ir_startino &&
+			    pagino < (rec.ir_startino + XFS_INODES_PER_CHUNK))
+				goto alloc_inode;
+		}
+
+		error = xfs_btree_dup_cursor(cur, &tcur);
+		if (error) 
+			goto error_cur;
+
+		error = xfs_inobt_lookup(tcur, pagino, XFS_LOOKUP_GE, &j);
+		if (error)
+			goto error_tcur;
+		if (j == 1) {
+			error = xfs_inobt_get_rec(tcur, &trec, &j);
+			if (error)
+				goto error_tcur;
+			XFS_WANT_CORRUPTED_GOTO(j == 1, error_tcur);
+		}
+
+		if (i == 1 && j == 1) {
+			if ((pagino - rec.ir_startino + XFS_INODES_PER_CHUNK - 1) >
+			    (trec.ir_startino - pagino)) {
+				rec = trec;
+				xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+				cur = tcur;
+			} else {
+				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+			}
+		} else if (j == 1) {
+			rec = trec;
+			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+			cur = tcur;
+		} else {
+			xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+		}
+	} else {
+		/*
+		 * Different AG from the parent inode. Check the record for the
+		 * most recently allocated inode.
+		 */
+		if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+			error = xfs_inobt_lookup(cur, agi->agi_newino,
+						 XFS_LOOKUP_EQ, &i);
+			if (error)
+				goto error_cur;
+			if (i == 1) {
+				error = xfs_inobt_get_rec(cur, &rec, &i);
+				if (error)
+					goto error_cur;
+				XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
+				goto alloc_inode;
+			}
+		}
+
+		/*
+		 * Allocate the first inode available in the AG.
+		 */
+		error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+		if (error)
+			goto error_cur;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
+
+		error = xfs_inobt_get_rec(cur, &rec, &i);
+		if (error)
+			goto error_cur;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
+	}
+
+alloc_inode:
+	offset = xfs_lowbit64(rec.ir_free);
+	ASSERT(offset >= 0);
+	ASSERT(offset < XFS_INODES_PER_CHUNK);
+	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+				   XFS_INODES_PER_CHUNK) == 0);
+	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+
+	/*
+	 * Modify or remove the finobt record.
+	 */
+	rec.ir_free &= ~XFS_INOBT_MASK(offset);
+	rec.ir_freecount--;
+	if (rec.ir_freecount) 
+		error = xfs_inobt_update(cur, &rec);
+	else
+		error = xfs_btree_delete(cur, &i);
+	if (error)
+		goto error_cur;
+
+	/*
+	 * Lookup and modify the equivalent record in the inobt.
+	 */
+	tcur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+
+	error = xfs_check_agi_freecount(tcur, agi);
+	if (error)
+		goto error_tcur;
+
+	error = xfs_inobt_lookup(tcur, rec.ir_startino, XFS_LOOKUP_EQ, &i);
+	if (error)
+		goto error_tcur;
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error_tcur);
+
+	error = xfs_inobt_get_rec(tcur, &trec, &i);
+	if (error)
+		goto error_tcur;
+	XFS_WANT_CORRUPTED_GOTO(i == 1, error_tcur);
+	ASSERT((XFS_AGINO_TO_OFFSET(mp, trec.ir_startino) %
+				   XFS_INODES_PER_CHUNK) == 0);
+
+	trec.ir_free &= ~XFS_INOBT_MASK(offset);
+	trec.ir_freecount--;
+
+	XFS_WANT_CORRUPTED_GOTO((rec.ir_free == trec.ir_free) &&
+				(rec.ir_freecount == trec.ir_freecount),
+				error_tcur);
+
+	error = xfs_inobt_update(tcur, &trec);
+	if (error)
+		goto error_tcur;
+
+	/*
+	 * Update the perag and superblock.
+	 */
+	be32_add_cpu(&agi->agi_freecount, -1);
+	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+	pag->pagi_freecount--;
+
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+
+	error = xfs_check_agi_freecount(tcur, agi);
+	if (error)
+		goto error_tcur;
+	error = xfs_check_agi_freecount(cur, agi);
+	if (error)
+		goto error_tcur;
+
+	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	xfs_perag_put(pag);
+	*inop = ino;
+	return 0;
+
+error_tcur:
+	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+error_cur:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	xfs_perag_put(pag);
+	return error;
+}
+
 /*
  * Allocate an inode on disk.
  *
-- 
1.8.3.1

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2014-05-07 12:22 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-07 12:21 [PATCH v4 00/20] xfsprogs: introduce the free inode btree Brian Foster
2014-05-07 12:21 ` [PATCH v4 01/20] xfs: refactor xfs_ialloc_btree.c to support multiple inobt numbers Brian Foster
2014-05-07 12:21 ` [PATCH v4 02/20] xfs: reserve v5 superblock read-only compat. feature bit for finobt Brian Foster
2014-05-07 12:21 ` [PATCH v4 03/20] xfs: support the XFS_BTNUM_FINOBT free inode btree type Brian Foster
2014-05-07 12:21 ` [PATCH v4 04/20] xfs: update inode allocation/free transaction reservations for finobt Brian Foster
2014-05-07 12:21 ` [PATCH v4 05/20] xfs: insert newly allocated inode chunks into the finobt Brian Foster
2014-05-07 12:21 ` Brian Foster [this message]
2014-05-07 12:21 ` [PATCH v4 07/20] xfs: refactor xfs_difree() inobt bits into xfs_difree_inobt() helper Brian Foster
2014-05-07 12:21 ` [PATCH v4 08/20] xfs: update the finobt on inode free Brian Foster
2014-05-07 12:21 ` [PATCH v4 09/20] xfs: report finobt status in fs geometry Brian Foster
2014-05-07 12:21 ` [PATCH v4 10/20] xfs: enable the finobt feature on v5 superblocks Brian Foster
2014-05-07 12:21 ` [PATCH v4 11/20] xfsprogs/mkfs: finobt mkfs support Brian Foster
2014-05-07 12:21 ` [PATCH v4 12/20] xfsprogs/db: finobt support Brian Foster
2014-05-07 12:21 ` [PATCH v4 13/20] xfsprogs/repair: account for finobt in ag 0 geometry pre-calculation Brian Foster
2014-05-07 12:21 ` [PATCH v4 14/20] xfsprogs/repair: phase 2 finobt scan Brian Foster
2014-05-07 12:21 ` [PATCH v4 15/20] xfsprogs/repair: pass btree block magic as param to build_ino_tree() Brian Foster
2014-05-07 12:21 ` [PATCH v4 16/20] xfsprogs/repair: pull the build_agi() call up out of the inode tree build Brian Foster
2014-05-07 12:21 ` [PATCH v4 17/20] xfsprogs/repair: helpers for finding in-core inode records w/ free inodes Brian Foster
2014-05-07 12:21 ` [PATCH v4 18/20] xfsprogs/repair: reconstruct the finobt in phase 5 Brian Foster
2014-05-07 12:21 ` [PATCH v4 19/20] xfsprogs/growfs: report finobt status in fs geometry (xfs_info) Brian Foster
2014-05-07 12:21 ` [PATCH v4 20/20] xfsprogs/db: add finobt support to metadump Brian Foster
2014-05-26 22:40 ` [PATCH v4 00/20] xfsprogs: introduce the free inode btree Dave Chinner
2014-05-27 12:06   ` Brian Foster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1399465319-65066-7-git-send-email-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.