public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 7/7] xfs: merge xfs_ialloc_ag_select into xfs_dialloc
Date: Mon, 18 Jun 2012 13:10:02 +1000	[thread overview]
Message-ID: <20120618031002.GN19223@dastard> (raw)
In-Reply-To: <20120605144836.693188178@bombadil.infradead.org>

On Tue, Jun 05, 2012 at 10:46:54AM -0400, Christoph Hellwig wrote:
> Both function contain the same basic loop over all AGs.  Merge the two
> by creating three passes in the loop instead of duplicating the code.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> ---
>  fs/xfs/xfs_ialloc.c |  179 +++++++++++++++-------------------------------------
>  1 file changed, 55 insertions(+), 124 deletions(-)
> 
> Index: xfs/fs/xfs/xfs_ialloc.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_ialloc.c	2012-06-04 12:58:07.104263361 -0400
> +++ xfs/fs/xfs/xfs_ialloc.c	2012-06-04 13:11:52.512284497 -0400
> @@ -439,114 +439,6 @@ xfs_ialloc_next_ag(
>  }
>  
>  /*
> - * Select an allocation group to look for a free inode in, based on the parent
> - * inode and then mode.  Return the allocation group buffer.
> - */
> -STATIC xfs_agnumber_t
> -xfs_ialloc_ag_select(
> -	xfs_trans_t	*tp,		/* transaction pointer */
> -	xfs_ino_t	parent,		/* parent directory inode number */
> -	umode_t		mode,		/* bits set to indicate file type */
> -	int		okalloc)	/* ok to allocate more space */
> -{
> -	xfs_agnumber_t	agcount;	/* number of ag's in the filesystem */
> -	xfs_agnumber_t	agno;		/* current ag number */
> -	int		flags;		/* alloc buffer locking flags */
> -	xfs_extlen_t	ineed;		/* blocks needed for inode allocation */
> -	xfs_extlen_t	longest = 0;	/* longest extent available */
> -	xfs_mount_t	*mp;		/* mount point structure */
> -	int		needspace;	/* file mode implies space allocated */
> -	xfs_perag_t	*pag;		/* per allocation group data */
> -	xfs_agnumber_t	pagno;		/* parent (starting) ag number */
> -	int		error;
> -
> -	/*
> -	 * Files of these types need at least one block if length > 0
> -	 * (and they won't fit in the inode, but that's hard to figure out).
> -	 */
> -	needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
> -	mp = tp->t_mountp;
> -	agcount = mp->m_maxagi;
> -	if (S_ISDIR(mode))
> -		pagno = xfs_ialloc_next_ag(mp);
> -	else {
> -		pagno = XFS_INO_TO_AGNO(mp, parent);
> -		if (pagno >= agcount)
> -			pagno = 0;
> -	}
> -
> -	ASSERT(pagno < agcount);
> -
> -	/*
> -	 * Loop through allocation groups, looking for one with a little
> -	 * free space in it.  Note we don't look for free inodes, exactly.
> -	 * Instead, we include whether there is a need to allocate inodes
> -	 * to mean that blocks must be allocated for them,
> -	 * if none are currently free.
> -	 */
> -	agno = pagno;
> -	flags = XFS_ALLOC_FLAG_TRYLOCK;
> -	for (;;) {
> -		pag = xfs_perag_get(mp, agno);
> -		if (!pag->pagi_inodeok) {
> -			xfs_ialloc_next_ag(mp);
> -			goto nextag;
> -		}
> -
> -		if (!pag->pagi_init) {
> -			error = xfs_ialloc_pagi_init(mp, tp, agno);
> -			if (error)
> -				goto nextag;
> -		}
> -
> -		if (pag->pagi_freecount) {
> -			xfs_perag_put(pag);
> -			return agno;
> -		}
> -
> -		if (!okalloc)
> -			goto nextag;
> -
> -		if (!pag->pagf_init) {
> -			error = xfs_alloc_pagf_init(mp, tp, agno, flags);
> -			if (error)
> -				goto nextag;
> -		}
> -
> -		/*
> -		 * Is there enough free space for the file plus a block of
> -		 * inodes? (if we need to allocate some)?
> -		 */
> -		ineed = XFS_IALLOC_BLOCKS(mp);
> -		longest = pag->pagf_longest;
> -		if (!longest)
> -			longest = pag->pagf_flcount > 0;
> -
> -		if (pag->pagf_freeblks >= needspace + ineed &&
> -		    longest >= ineed) {
> -			xfs_perag_put(pag);
> -			return agno;
> -		}
> -nextag:
> -		xfs_perag_put(pag);
> -		/*
> -		 * No point in iterating over the rest, if we're shutting
> -		 * down.
> -		 */
> -		if (XFS_FORCED_SHUTDOWN(mp))
> -			return NULLAGNUMBER;
> -		agno++;
> -		if (agno >= agcount)
> -			agno = 0;
> -		if (agno == pagno) {
> -			if (flags == 0)
> -				return NULLAGNUMBER;
> -			flags = 0;
> -		}
> -	}
> -}
> -
> -/*
>   * Try to retrieve the next record to the left/right from the current one.
>   */
>  STATIC int
> @@ -901,9 +793,9 @@ xfs_dialloc(
>  	xfs_buf_t	*agbp;		/* allocation group header's buffer */
>  	xfs_agnumber_t	agno;		/* allocation group number */
>  	int		error;		/* error return value */
> -	int		ialloced;	/* inode allocation status */
>  	int		noroom = 0;	/* no space for inode blk allocation */
>  	xfs_agnumber_t	start_agno;	/* starting allocation group number */
> +	int		pass;
>  	struct xfs_perag *pag;
>  
>  	if (*IO_agbp) {
> @@ -917,16 +809,6 @@ xfs_dialloc(
>  	}
>  
>  	/*
> -	 * We do not have an agbp, so select an initial allocation
> -	 * group for inode allocation.
> -	 */
> -	start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
> -	if (start_agno == NULLAGNUMBER) {
> -		*inop = NULLFSINO;
> -		return 0;
> -	}
> -
> -	/*
>  	 * If we have already hit the ceiling of inode blocks then clear
>  	 * okalloc so we scan all available agi structures for a free
>  	 * inode.
> @@ -938,12 +820,31 @@ xfs_dialloc(
>  	}
>  
>  	/*
> -	 * Loop until we find an allocation group that either has free inodes
> -	 * or in which we can allocate some inodes.  Iterate through the
> -	 * allocation groups upward, wrapping at the end.
> +	 * For directories start with a new allocation groups, for other file
> +	 * types aim to find an inode close to the parent.
>  	 */
> +	if (S_ISDIR(mode)) {
> +		start_agno = xfs_ialloc_next_ag(mp);
> +		ASSERT(start_agno < mp->m_maxagi);
> +	} else {
> +		start_agno = XFS_INO_TO_AGNO(mp, parent);
> +		if (start_agno >= mp->m_maxagi)
> +			start_agno = 0;
> +	}
> +
> +	/*
> +	 * Loop through allocation groups, looking for one with a little
> +	 * free space in it.  Note we don't look for free inodes, exactly.
> +	 * Instead, we include whether there is a need to allocate inodes
> +	 * to mean that blocks must be allocated for them, if none are
> +	 * currently free.
> +	 */
> +	*inop = NULLFSINO;
>  	agno = start_agno;
> +	pass = 0;

Do we even need multiple passes here? The first and second
passes appear to be identical except for the XFS_ALLOC_FLAG_TRYLOCK
flag on the xfs_alloc_pagf_init() call. After the first time we've
allocated in an AG, pag->pagf_init will always be set, so this means
pass = 0 and pass = 1 are identical for anything other than a
freshly mounted filesystem. Hence I think you can just drop the
trylock pass here.

And further, I can't see why we need a second pass at all.

I.e. what we used to do was:

select ag:

	pass 1:
		nonblocking scan over all AGI/AGF buffers
	pass 2 on fail:
		blocking scan over all AGI/AGF buffers

dialloc:
	if okalloc
		allocate inode chunk
	else
		pass 3:
			blocking scan over AGIs to find free inodes

So the 3 passes were used to work around the blocking nature of
AGI/AGF locks and minimising the allocation latency caused by
waiting on busy AGI/AGF buffers. By moving to using the per-ag data,
we avoid that latency problem altogether except for the
initialisation cases, which onyl occur just after mount.

Your logic now is:

dialloc:
	pass 1
		nonblocking scan over pagi/pagf
		if free inodes found, allocate
		else if pagf cannot be read, goto pass 2
		else if no contiguous free space could be found goto pass 2
		else allocate inode chunk and return

	pass 2
		nonblocking scan over pagi/pagf
		if free inodes found, allocate
		else if no contiguous free space could be found goto pass 3
		else allocate inode chunk and return

	pass 3:
		nonblocking scan over pagi/pagf
		if free inodes found, allocate
		else allocate inode chunk and return

AFAICS, pass 3 will always fail if pass 2 failed - if there isn't
enough contiguous free space in the AGF, we won't be able to
allocate a new inode chunk and avoiding that check won't change
anything. And given that pass 2 is completely redundant for a
filesytem that has been active for a few minutes, I really can't see
a need for multiple passes here at all...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2012-06-18  3:10 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-05 14:46 [PATCH 0/7] inode allocator refactoring Christoph Hellwig
2012-06-05 14:46 ` [PATCH 1/7] xfs: remove xfs_ialloc_find_free Christoph Hellwig
2012-06-12  8:19   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 2/7] xfs: split xfs_dialloc Christoph Hellwig
2012-06-18  1:46   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 3/7] xfs: remove the alloc_done argument to xfs_dialloc Christoph Hellwig
2012-06-18  2:07   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 4/7] xfs: add a short cut to xfs_dialloc for the non-NULL agbp case Christoph Hellwig
2012-06-18  2:11   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 5/7] xfs: refactor xfs_ialloc_ag_select Christoph Hellwig
2012-06-18  2:23   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 6/7] xfs: do not read the AGI buffer in xfs_dialloc until nessecary Christoph Hellwig
2012-06-18  2:30   ` Dave Chinner
2012-06-05 14:46 ` [PATCH 7/7] xfs: merge xfs_ialloc_ag_select into xfs_dialloc Christoph Hellwig
2012-06-18  3:10   ` Dave Chinner [this message]
  -- strict thread matches above, loose matches on Subject: below --
2012-07-04 14:54 [PATCH 0/7] inode allocator refactoring V2 Christoph Hellwig
2012-07-04 14:54 ` [PATCH 7/7] xfs: merge xfs_ialloc_ag_select into xfs_dialloc Christoph Hellwig
2012-07-26 17:47   ` Mark Tinguely
2012-07-30 17:21     ` Mark Tinguely

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120618031002.GN19223@dastard \
    --to=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox