public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Lachlan McIlroy <lachlan@sgi.com>
To: David Chinner <dgc@sgi.com>
Cc: xfs-dev <xfs-dev@sgi.com>, xfs-oss <xfs@oss.sgi.com>
Subject: Re: [Patch] Per iclog callback chain lock
Date: Wed, 02 Apr 2008 15:09:31 +1000	[thread overview]
Message-ID: <47F3150B.6000106@sgi.com> (raw)
In-Reply-To: <20080401231348.GT103491721@sgi.com>

Looks fine to me - just one small comment.

David Chinner wrote:
> Introduce an iclog callback chain lock.
> 
> Rather than use the icloglock for protecting the iclog completion
> callback chain, use a new per-iclog lock so that walking the
> callback chain doesn't require holding a global lock.
> 
> This reduces contention on the icloglock during log buffer I/O
> completion as the callback chain lock is take for every callback
> that is issued. On large log buffers, this can number in the
> hundreds to thousands per iclog so isolating the lock to the
> iclog makes a lot of sense.
> 
> Signed-off-by: Dave Chinner <dgc@sgi.com>
> ---
>  fs/xfs/xfs_log.c      |   35 +++++++++++++++++++----------------
>  fs/xfs/xfs_log_priv.h |   33 ++++++++++++++++++++++++++-------
>  2 files changed, 45 insertions(+), 23 deletions(-)
> 
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-03-13 13:10:23.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-03-13 19:35:51.251913648 +1100
> @@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mo
>  	       void		  *iclog_hndl,	/* iclog to hang callback off */
>  	       xfs_log_callback_t *cb)
>  {
> -	xlog_t *log = mp->m_log;
>  	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
>  	int	abortflg;
>  
> -	cb->cb_next = NULL;
> -	spin_lock(&log->l_icloglock);
> +	spin_lock(&iclog->ic_callback_lock);
>  	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
>  	if (!abortflg) {
>  		ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
> @@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mo
>  		*(iclog->ic_callback_tail) = cb;
>  		iclog->ic_callback_tail = &(cb->cb_next);
>  	}
> -	spin_unlock(&log->l_icloglock);
> +	spin_unlock(&iclog->ic_callback_lock);
>  	return abortflg;
>  }	/* xfs_log_notify */
>  
> @@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t	*mp,
>  		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
>  		iclog->ic_state = XLOG_STATE_ACTIVE;
>  		iclog->ic_log = log;
> +		atomic_set(&iclog->ic_refcnt, 0);
Not related to this change but looks like we need it anyway.
Did you mean to include it in this change?

> +		spin_lock_init(&iclog->ic_callback_lock);
>  		iclog->ic_callback_tail = &(iclog->ic_callback);
>  		iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
>  
> @@ -1990,7 +1990,7 @@ xlog_state_clean_log(xlog_t *log)
>  		if (iclog->ic_state == XLOG_STATE_DIRTY) {
>  			iclog->ic_state	= XLOG_STATE_ACTIVE;
>  			iclog->ic_offset       = 0;
> -			iclog->ic_callback	= NULL;   /* don't need to free */
> +			ASSERT(iclog->ic_callback == NULL);
>  			/*
>  			 * If the number of ops in this iclog indicate it just
>  			 * contains the dummy transaction, we can
> @@ -2193,37 +2193,40 @@ xlog_state_do_callback(
>  					be64_to_cpu(iclog->ic_header.h_lsn);
>  				spin_unlock(&log->l_grant_lock);
>  
> -				/*
> -				 * Keep processing entries in the callback list
> -				 * until we come around and it is empty.  We
> -				 * need to atomically see that the list is
> -				 * empty and change the state to DIRTY so that
> -				 * we don't miss any more callbacks being added.
> -				 */
> -				spin_lock(&log->l_icloglock);
>  			} else {
> +				spin_unlock(&log->l_icloglock);
>  				ioerrors++;
>  			}
> -			cb = iclog->ic_callback;
>  
> +			/*
> +			 * Keep processing entries in the callback list until
> +			 * we come around and it is empty.  We need to
> +			 * atomically see that the list is empty and change the
> +			 * state to DIRTY so that we don't miss any more
> +			 * callbacks being added.
> +			 */
> +			spin_lock(&iclog->ic_callback_lock);
> +			cb = iclog->ic_callback;
>  			while (cb) {
>  				iclog->ic_callback_tail = &(iclog->ic_callback);
>  				iclog->ic_callback = NULL;
> -				spin_unlock(&log->l_icloglock);
> +				spin_unlock(&iclog->ic_callback_lock);
>  
>  				/* perform callbacks in the order given */
>  				for (; cb; cb = cb_next) {
>  					cb_next = cb->cb_next;
>  					cb->cb_func(cb->cb_arg, aborted);
>  				}
> -				spin_lock(&log->l_icloglock);
> +				spin_lock(&iclog->ic_callback_lock);
>  				cb = iclog->ic_callback;
>  			}
>  
>  			loopdidcallbacks++;
>  			funcdidcallbacks++;
>  
> +			spin_lock(&log->l_icloglock);
>  			ASSERT(iclog->ic_callback == NULL);
> +			spin_unlock(&iclog->ic_callback_lock);
Okay so we can acquire the l_icloglock while holding ic_callback_lock.

>  			if (!(iclog->ic_state & XLOG_STATE_IOERROR))
>  				iclog->ic_state = XLOG_STATE_DIRTY;
>  
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-02-22 13:48:25.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-03-13 19:34:57.430809151 +1100
> @@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
>   * - ic_offset is the current number of bytes written to in this iclog.
>   * - ic_refcnt is bumped when someone is writing to the log.
>   * - ic_state is the state of the iclog.
> + *
> + * Because of cacheline contention on large machines, we need to separate
> + * various resources onto different cachelines. To start with, make the
> + * structure cacheline aligned. The following fields can be contended on
> + * by independent processes:
> + *
> + *	- ic_callback_*
> + *	- ic_refcnt
> + *	- fields protected by the global l_icloglock
> + *
> + * so we need to ensure that these fields are located in separate cachelines.
> + * We'll put all the read-only and l_icloglock fields in the first cacheline,
> + * and move everything else out to subsequent cachelines.
>   */
>  typedef struct xlog_iclog_fields {
>  	sv_t			ic_forcesema;
> @@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
>  	struct xlog_in_core	*ic_prev;
>  	struct xfs_buf		*ic_bp;
>  	struct log		*ic_log;
> -	xfs_log_callback_t	*ic_callback;
> -	xfs_log_callback_t	**ic_callback_tail;
> -#ifdef XFS_LOG_TRACE
> -	struct ktrace		*ic_trace;
> -#endif
>  	int			ic_size;
>  	int			ic_offset;
> -	atomic_t		ic_refcnt;
>  	int			ic_bwritecnt;
>  	ushort_t		ic_state;
>  	char			*ic_datap;	/* pointer to iclog data */
> -} xlog_iclog_fields_t;
> +#ifdef XFS_LOG_TRACE
> +	struct ktrace		*ic_trace;
> +#endif
> +
> +	/* Callback structures need their own cacheline */
> +	spinlock_t		ic_callback_lock ____cacheline_aligned_in_smp;
> +	xfs_log_callback_t	*ic_callback;
> +	xfs_log_callback_t	**ic_callback_tail;
> +
> +	/* reference counts need their own cacheline */
> +	atomic_t		ic_refcnt ____cacheline_aligned_in_smp;
> +} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
>  
>  typedef union xlog_in_core2 {
>  	xlog_rec_header_t	hic_header;
> @@ -366,6 +384,7 @@ typedef struct xlog_in_core {
>  #define	ic_bp		hic_fields.ic_bp
>  #define	ic_log		hic_fields.ic_log
>  #define	ic_callback	hic_fields.ic_callback
> +#define	ic_callback_lock hic_fields.ic_callback_lock
>  #define	ic_callback_tail hic_fields.ic_callback_tail
>  #define	ic_trace	hic_fields.ic_trace
>  #define	ic_size		hic_fields.ic_size
> 

  parent reply	other threads:[~2008-04-02  4:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-01 23:13 [Patch] Per iclog callback chain lock David Chinner
2008-04-01 23:24 ` David Chinner
2008-04-02  5:09 ` Lachlan McIlroy [this message]
2008-04-02  4:39   ` David Chinner
2008-04-02  6:19     ` Lachlan McIlroy
2008-04-07 12:51 ` Christoph Hellwig
2008-04-07 22:17   ` David Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47F3150B.6000106@sgi.com \
    --to=lachlan@sgi.com \
    --cc=dgc@sgi.com \
    --cc=xfs-dev@sgi.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox