From: Dave Chinner <david@fromorbit.com>
To: Brian Foster <bfoster@redhat.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH v3 06/11] xfs: use and update the finobt on inode allocation
Date: Tue, 11 Feb 2014 18:17:03 +1100 [thread overview]
Message-ID: <20140211071703.GG13647@dastard> (raw)
In-Reply-To: <1391536182-9048-7-git-send-email-bfoster@redhat.com>
On Tue, Feb 04, 2014 at 12:49:37PM -0500, Brian Foster wrote:
> Replace xfs_dialloc_ag() with an implementation that looks for a
> record in the finobt. The finobt only tracks records with at least
> one free inode. This eliminates the need for the intra-ag scan in
> the original algorithm. Once the inode is allocated, update the
> finobt appropriately (possibly removing the record) as well as the
> inobt.
>
> Move the original xfs_dialloc_ag() algorithm to
> xfs_dialloc_ag_slow() and fall back as such if finobt support is
> not enabled.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
> fs/xfs/xfs_ialloc.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 210 insertions(+), 1 deletion(-)
>
> diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
> index bdaab76..afc9840 100644
> --- a/fs/xfs/xfs_ialloc.c
> +++ b/fs/xfs/xfs_ialloc.c
> @@ -723,7 +723,7 @@ xfs_ialloc_get_rec(
> * available.
> */
> STATIC int
> -xfs_dialloc_ag(
> +xfs_dialloc_ag_slow(
> struct xfs_trans *tp,
> struct xfs_buf *agbp,
> xfs_ino_t parent,
OK, though I would have called it xfs_dialloc_ag_from_inobt() or
something similar to indicate what tree it is walking....
> +STATIC int
> +xfs_dialloc_ag(
> + struct xfs_trans *tp,
> + struct xfs_buf *agbp,
> + xfs_ino_t parent,
> + xfs_ino_t *inop)
> +{
Initial thought: Wow, long, long function. How can we split this up?
> + struct xfs_mount *mp = tp->t_mountp;
> + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
> + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
> + xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
> + xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
> + struct xfs_perag *pag;
> + struct xfs_btree_cur *cur;
> + struct xfs_btree_cur *tcur;
> + struct xfs_inobt_rec_incore rec;
> + struct xfs_inobt_rec_incore trec;
> + xfs_ino_t ino;
> + int error;
> + int offset;
> + int i, j;
> +
> + if (!xfs_sb_version_hasfinobt(&mp->m_sb))
> + return xfs_dialloc_ag_slow(tp, agbp, parent, inop);
> +
> + pag = xfs_perag_get(mp, agno);
> +
> + /*
> + * If pagino is 0 (this is the root inode allocation) use newino.
> + * This must work because we've just allocated some.
> + */
> + if (!pagino)
> + pagino = be32_to_cpu(agi->agi_newino);
> +
> + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
> +
> + error = xfs_check_agi_freecount(cur, agi);
> + if (error)
> + goto error_cur;
> +
> + if (agno == pagno) {
> + /*
> + * We're in the same AG as the parent inode so allocate the
> + * closest inode to the parent.
> + */
> + error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
> + if (error)
> + goto error_cur;
> + if (i == 1) {
> + error = xfs_inobt_get_rec(cur, &rec, &i);
> + if (error)
> + goto error_cur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
> +
> + /*
> + * See if we've landed in the parent inode record. The
> + * finobt only tracks chunks with at least one free
> + * inode, so record existence is enough.
> + */
> + if (pagino >= rec.ir_startino &&
> + pagino < (rec.ir_startino + XFS_INODES_PER_CHUNK))
> + goto alloc_inode;
> + }
> +
> + error = xfs_btree_dup_cursor(cur, &tcur);
> + if (error)
> + goto error_cur;
> +
> + error = xfs_inobt_lookup(tcur, pagino, XFS_LOOKUP_GE, &j);
> + if (error)
> + goto error_tcur;
> + if (j == 1) {
> + error = xfs_inobt_get_rec(tcur, &trec, &j);
> + if (error)
> + goto error_tcur;
> + XFS_WANT_CORRUPTED_GOTO(j == 1, error_tcur);
> + }
> +
> + if (i == 1 && j == 1) {
> + if ((pagino - rec.ir_startino + XFS_INODES_PER_CHUNK - 1) >
> + (trec.ir_startino - pagino)) {
> + rec = trec;
> + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> + cur = tcur;
> + } else {
> + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
> + }
> + } else if (j == 1) {
> + rec = trec;
> + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> + cur = tcur;
> + } else {
> + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
> + }
That entire branch can be easily factored into:
error = xfs_dialloc_ag_finobt_near(cur, pagino, &rec);
/*
* Allocate as close to the target agino as possible
*/
static int
xfs_dialloc_ag_finobt_near(
struct xfs_btree_cur *cur,
xfs_agino_t agino,
struct xfs_inobt_rec_incore *rec)
{
struct xfs_btree_cur *rcur; /* cursor for right search */
struct xfs_inobt_rec_incore rrec; /* and the record used */
int error;
int i;
int l;
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (error)
return error;
if (i == 1) {
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
/*
* See if we've landed in the target inode record. The
* finobt only tracks chunks with at least one free
* inode, so record existence is enough.
*/
if (agino >= rec->ir_startino &&
agino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
return 0;
}
error = xfs_btree_dup_cursor(cur, &rcur);
if (error)
return error;
error = xfs_inobt_lookup(rcur, agino, XFS_LOOKUP_GE, &j);
if (error)
goto error_rcur;
if (j == 1) {
error = xfs_inobt_get_rec(rcur, &rrec, &j);
if (error)
goto error_rcur;
XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
}
if (i == 1 && j == 1) {
/*
* both left and right records are valid, so choose
* the closer inode chunk to the target.
*/
if ((agino - rec.ir_startino + XFS_INODES_PER_CHUNK - 1) >
(rrec.ir_startino - agino)) {
*rec = rrec;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
cur = rcur;
} else {
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
}
} else if (j == 1) {
/* only right record is valid */
*rec = rrec;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
cur = rcur;
} else {
/* Found neither left nor right.
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
}
return 0;
error_rcur:
xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
return error;
}
> + } else {
> + /*
> + * Different AG from the parent inode. Check the record for the
> + * most recently allocated inode.
> + */
> + if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
> + error = xfs_inobt_lookup(cur, agi->agi_newino,
> + XFS_LOOKUP_EQ, &i);
> + if (error)
> + goto error_cur;
> + if (i == 1) {
> + error = xfs_inobt_get_rec(cur, &rec, &i);
> + if (error)
> + goto error_cur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
> + goto alloc_inode;
> + }
> + }
> +
> + /*
> + * Allocate the first inode available in the AG.
> + */
> + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
> + if (error)
> + goto error_cur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
> +
> + error = xfs_inobt_get_rec(cur, &rec, &i);
> + if (error)
> + goto error_cur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_cur);
And that can be factored in a similar manner in
xfs_dialloc_ag_newino()....
> + }
> +
> +alloc_inode:
> + offset = xfs_lowbit64(rec.ir_free);
> + ASSERT(offset >= 0);
> + ASSERT(offset < XFS_INODES_PER_CHUNK);
> + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
> + XFS_INODES_PER_CHUNK) == 0);
> + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
> +
> + /*
> + * Modify or remove the finobt record.
> + */
> + rec.ir_free &= ~XFS_INOBT_MASK(offset);
> + rec.ir_freecount--;
> + if (rec.ir_freecount)
> + error = xfs_inobt_update(cur, &rec);
> + else
> + error = xfs_btree_delete(cur, &i);
> + if (error)
> + goto error_cur;
> +
> + /*
> + * Lookup and modify the equivalent record in the inobt.
> + */
> + tcur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
In case ou hadn't guessed, I don't like the "tcur/trec" variables
because they make me thing "temporary" which they aren't. In this
case it is the inobt cursor and record....
In fact, this whole segment could be factored into a function like
xfs_dialloc_ag_inobt_update(), hence removing the second cursor from
xfs_dialloc_ag() function altogether and that would clean a lot of
the logic up....
> +
> + error = xfs_check_agi_freecount(tcur, agi);
> + if (error)
> + goto error_tcur;
> +
> + error = xfs_inobt_lookup(tcur, rec.ir_startino, XFS_LOOKUP_EQ, &i);
> + if (error)
> + goto error_tcur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_tcur);
> +
> + error = xfs_inobt_get_rec(tcur, &trec, &i);
> + if (error)
> + goto error_tcur;
> + XFS_WANT_CORRUPTED_GOTO(i == 1, error_tcur);
> + ASSERT((XFS_AGINO_TO_OFFSET(mp, trec.ir_startino) %
> + XFS_INODES_PER_CHUNK) == 0);
> +
> + trec.ir_free &= ~XFS_INOBT_MASK(offset);
> + trec.ir_freecount--;
> +
> + XFS_WANT_CORRUPTED_GOTO((rec.ir_free == trec.ir_free) &&
> + (rec.ir_freecount == trec.ir_freecount),
> + error_tcur);
> +
> + error = xfs_inobt_update(tcur, &trec);
> + if (error)
> + goto error_tcur;
> +
> + /*
> + * Update the perag and superblock.
> + */
> + be32_add_cpu(&agi->agi_freecount, -1);
> + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
> + pag->pagi_freecount--;
> +
> + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
This will need to be done before you update the inobt, though, so
you can run the xfs_check_agi_freecount() count in it and it will
come out correct....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2014-02-11 7:17 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-02-04 17:49 [PATCH v3 00/11] xfs: introduce the free inode btree Brian Foster
2014-02-04 17:49 ` [PATCH v3 01/11] xfs: refactor xfs_ialloc_btree.c to support multiple inobt numbers Brian Foster
2014-02-04 17:49 ` [PATCH v3 02/11] xfs: reserve v5 superblock read-only compat. feature bit for finobt Brian Foster
2014-02-11 6:07 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 03/11] xfs: support the XFS_BTNUM_FINOBT free inode btree type Brian Foster
2014-02-11 6:22 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 04/11] xfs: update inode allocation/free transaction reservations for finobt Brian Foster
2014-02-11 6:46 ` Dave Chinner
2014-02-11 16:22 ` Brian Foster
2014-02-20 1:00 ` Dave Chinner
2014-02-20 16:04 ` Brian Foster
2014-02-18 17:10 ` Brian Foster
2014-02-18 20:34 ` Brian Foster
2014-02-20 2:01 ` Dave Chinner
2014-02-20 18:49 ` Brian Foster
2014-02-20 20:50 ` Dave Chinner
2014-02-20 21:14 ` Christoph Hellwig
2014-02-20 23:13 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 05/11] xfs: insert newly allocated inode chunks into the finobt Brian Foster
2014-02-11 6:48 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 06/11] xfs: use and update the finobt on inode allocation Brian Foster
2014-02-11 7:17 ` Dave Chinner [this message]
2014-02-11 16:32 ` Brian Foster
2014-02-14 20:01 ` Brian Foster
2014-02-20 0:38 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 07/11] xfs: refactor xfs_difree() inobt bits into xfs_difree_inobt() helper Brian Foster
2014-02-11 7:19 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 08/11] xfs: update the finobt on inode free Brian Foster
2014-02-11 7:31 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 09/11] xfs: add finobt support to growfs Brian Foster
2014-02-04 17:49 ` [PATCH v3 10/11] xfs: report finobt status in fs geometry Brian Foster
2014-02-11 7:34 ` Dave Chinner
2014-02-04 17:49 ` [PATCH v3 11/11] xfs: enable the finobt feature on v5 superblocks Brian Foster
2014-02-11 7:34 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140211071703.GG13647@dastard \
--to=david@fromorbit.com \
--cc=bfoster@redhat.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.