All of lore.kernel.org
 help / color / mirror / Atom feed
From: Niv Sardi <xaiki@cxhome.ath.cx>
To: David Chinner <dgc@sgi.com>
Cc: xfs-dev <xfs-dev@sgi.com>, xfs-oss <xfs@oss.sgi.com>
Subject: Re: [Patch] unique per-AG inode generation number initialisation
Date: Wed, 02 Apr 2008 15:02:42 +1100	[thread overview]
Message-ID: <nccbq4slx99.fsf@sgi.com> (raw)
In-Reply-To: <20080401231815.GW103491721@sgi.com> (David Chinner's message of "Wed, 2 Apr 2008 09:18:15 +1000")


<HAT type=DMAPI>

David Chinner <dgc@sgi.com> writes:
> Don't initialise new inode generation numbers to zero
>
> When we allocation new inode chunks, we initialise the generation
> numbers to zero. This works fine until we delete a chunk and then
> reallocate it, resulting in the same inode numbers but with a
> reset generation count. This can result in inode/generation
> pairs of different inodes occurring relatively close together.
>
> Given that the inode/gen pair makes up the "unique" portion of
> an NFS filehandle on XFS, this can result in file handles cached
> on clients being seen on the wire from the server but refer to
> a different file. This causes .... issues for NFS clients.
>
> Hence we need a unique generation number initialisation for
> each inode to prevent reuse of a small portion of the generation
> number space. Make this initialiser per-allocation group so
> that it is not a single point of contention in the filesystem,
> and increment it on every allocation within an AG to reduce the
> chance that a generation number is reused for a given inode number
> if the inode chunk is deleted and reallocated immediately
> afterwards.
>
> It is safe to add the agi_newinogen field to the AGI without
> using a feature bit. If an older kernel is used, it simply
> will not update the field on allocation. If the kernel is
> updated and the field has garbage in it, then it's like having a
> random seed to the generation number....
>
> Signed-off-by: Dave Chinner <dgc@sgi.com>
> ---
>  fs/xfs/xfs_ag.h     |    4 +++-
>  fs/xfs/xfs_ialloc.c |   30 ++++++++++++++++++++++--------
>  2 files changed, 25 insertions(+), 9 deletions(-)

Appart from the bit of overhead all seems good.

> Index: 2.6.x-xfs-new/fs/xfs/xfs_ag.h
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_ag.h	2008-01-18 18:30:06.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_ag.h	2008-03-26 13:03:41.122918236 +1100
> @@ -121,6 +121,7 @@ typedef struct xfs_agi {
>  	 * still being referenced.
>  	 */
>  	__be32		agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
> +	__be32		agi_newinogen;	/* inode cluster generation */
>  } xfs_agi_t;
>  
>  #define	XFS_AGI_MAGICNUM	0x00000001
> @@ -134,7 +135,8 @@ typedef struct xfs_agi {
>  #define	XFS_AGI_NEWINO		0x00000100
>  #define	XFS_AGI_DIRINO		0x00000200
>  #define	XFS_AGI_UNLINKED	0x00000400
> -#define	XFS_AGI_NUM_BITS	11
> +#define	XFS_AGI_NEWINOGEN	0x00000800
> +#define	XFS_AGI_NUM_BITS	12
>  #define	XFS_AGI_ALL_BITS	((1 << XFS_AGI_NUM_BITS) - 1)
>  
>  /* disk block (xfs_daddr_t) in the AG */
> Index: 2.6.x-xfs-new/fs/xfs/xfs_ialloc.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_ialloc.c	2008-03-25 15:41:27.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_ialloc.c	2008-03-26 14:29:47.998554368 +1100
> @@ -309,6 +309,8 @@ xfs_ialloc_ag_alloc(
>  			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
>  			free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
>  			free->di_core.di_version = version;
> +			free->di_core.di_gen = agi->agi_newinogen;
> +			be32_add_cpu(&agi->agi_newinogen, 1);
>  			free->di_next_unlinked = cpu_to_be32(NULLAGINO);
>  			xfs_ialloc_log_di(tp, fbuf, i,
>  				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
> @@ -347,7 +349,8 @@ xfs_ialloc_ag_alloc(
>  	 * Log allocation group header fields
>  	 */
>  	xfs_ialloc_log_agi(tp, agbp,
> -		XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
> +		XFS_AGI_COUNT | XFS_AGI_FREECOUNT |
> +		XFS_AGI_NEWINO | XFS_AGI_NEWINOGEN);
>  	/*
>  	 * Modify/log superblock values for inode count and inode free count.
>  	 */
> @@ -896,11 +899,12 @@ nextag:
>  	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
>  	XFS_INOBT_CLR_FREE(&rec, offset);
>  	rec.ir_freecount--;
> +	be32_add_cpu(&agi->agi_newinogen, 1);
>  	if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
>  			rec.ir_free)))
>  		goto error0;
>  	be32_add(&agi->agi_freecount, -1);
> -	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
> +	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT | XFS_AGI_NEWINOGEN);
>  	down_read(&mp->m_peraglock);
>  	mp->m_perag[tagno].pagi_freecount--;
>  	up_read(&mp->m_peraglock);
> @@ -1320,6 +1324,11 @@ xfs_ialloc_compute_maxlevels(
>  
>  /*
>   * Log specified fields for the ag hdr (inode section)
> + *
> + * We don't log the unlinked inode fields through here; they
> + * get logged directly to the buffer. Hence we have a discontinuity
> + * in the fields we are logging and we need two calls to map all
> + * the dirtied parts of the agi....
>   */
>  void
>  xfs_ialloc_log_agi(
> @@ -1342,22 +1351,27 @@ xfs_ialloc_log_agi(
>  		offsetof(xfs_agi_t, agi_newino),
>  		offsetof(xfs_agi_t, agi_dirino),
>  		offsetof(xfs_agi_t, agi_unlinked),
> +		offsetof(xfs_agi_t, agi_newinogen),
>  		sizeof(xfs_agi_t)
>  	};
> +	int			log_newino = fields & XFS_AGI_NEWINOGEN;
> +
>  #ifdef DEBUG
>  	xfs_agi_t		*agi;	/* allocation group header */
>  
>  	agi = XFS_BUF_TO_AGI(bp);
>  	ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
>  #endif
> -	/*
> -	 * Compute byte offsets for the first and last fields.
> -	 */
> +	fields &= ~XFS_AGI_NEWINOGEN;
> +
> +	/* Compute byte offsets for the first and last fields.  */
>  	xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
> -	/*
> -	 * Log the allocation group inode header buffer.
> -	 */
>  	xfs_trans_log_buf(tp, bp, first, last);
> +	if (log_newino) {
> +		xfs_btree_offsets(XFS_AGI_NEWINOGEN, offsets, XFS_AGI_NUM_BITS,
> +					&first, &last);
> +		xfs_trans_log_buf(tp, bp, first, last);
> +	}
>  }
>  
>  /*

-- 
Niv Sardi

  reply	other threads:[~2008-04-02  4:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-01 23:18 [Patch] unique per-AG inode generation number initialisation David Chinner
2008-04-02  4:02 ` Niv Sardi [this message]
2008-04-04  9:08 ` Hans-Peter Jansen
2008-04-07 12:57 ` Christoph Hellwig
2008-04-07 21:52   ` David Chinner
2008-04-10  4:34     ` David Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=nccbq4slx99.fsf@sgi.com \
    --to=xaiki@cxhome.ath.cx \
    --cc=dgc@sgi.com \
    --cc=xfs-dev@sgi.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.