All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lachlan McIlroy <lachlan@sgi.com>
To: David Chinner <dgc@sgi.com>
Cc: xfs-dev <xfs-dev@sgi.com>, xfs-oss <xfs@oss.sgi.com>
Subject: Re: [Patch] Per iclog callback chain lock
Date: Wed, 02 Apr 2008 15:09:31 +1000	[thread overview]
Message-ID: <47F3150B.6000106@sgi.com> (raw)
In-Reply-To: <20080401231348.GT103491721@sgi.com>

Looks fine to me - just one small comment.

David Chinner wrote:
> Introduce an iclog callback chain lock.
> 
> Rather than use the icloglock for protecting the iclog completion
> callback chain, use a new per-iclog lock so that walking the
> callback chain doesn't require holding a global lock.
> 
> This reduces contention on the icloglock during log buffer I/O
> completion as the callback chain lock is take for every callback
> that is issued. On large log buffers, this can number in the
> hundreds to thousands per iclog so isolating the lock to the
> iclog makes a lot of sense.
> 
> Signed-off-by: Dave Chinner <dgc@sgi.com>
> ---
>  fs/xfs/xfs_log.c      |   35 +++++++++++++++++++----------------
>  fs/xfs/xfs_log_priv.h |   33 ++++++++++++++++++++++++++-------
>  2 files changed, 45 insertions(+), 23 deletions(-)
> 
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-03-13 13:10:23.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-03-13 19:35:51.251913648 +1100
> @@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mo
>  	       void		  *iclog_hndl,	/* iclog to hang callback off */
>  	       xfs_log_callback_t *cb)
>  {
> -	xlog_t *log = mp->m_log;
>  	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
>  	int	abortflg;
>  
> -	cb->cb_next = NULL;
> -	spin_lock(&log->l_icloglock);
> +	spin_lock(&iclog->ic_callback_lock);
>  	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
>  	if (!abortflg) {
>  		ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
> @@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t	  *mp,		/* mo
>  		*(iclog->ic_callback_tail) = cb;
>  		iclog->ic_callback_tail = &(cb->cb_next);
>  	}
> -	spin_unlock(&log->l_icloglock);
> +	spin_unlock(&iclog->ic_callback_lock);
>  	return abortflg;
>  }	/* xfs_log_notify */
>  
> @@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t	*mp,
>  		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
>  		iclog->ic_state = XLOG_STATE_ACTIVE;
>  		iclog->ic_log = log;
> +		atomic_set(&iclog->ic_refcnt, 0);
Not related to this change but looks like we need it anyway.
Did you mean to include it in this change?

> +		spin_lock_init(&iclog->ic_callback_lock);
>  		iclog->ic_callback_tail = &(iclog->ic_callback);
>  		iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
>  
> @@ -1990,7 +1990,7 @@ xlog_state_clean_log(xlog_t *log)
>  		if (iclog->ic_state == XLOG_STATE_DIRTY) {
>  			iclog->ic_state	= XLOG_STATE_ACTIVE;
>  			iclog->ic_offset       = 0;
> -			iclog->ic_callback	= NULL;   /* don't need to free */
> +			ASSERT(iclog->ic_callback == NULL);
>  			/*
>  			 * If the number of ops in this iclog indicate it just
>  			 * contains the dummy transaction, we can
> @@ -2193,37 +2193,40 @@ xlog_state_do_callback(
>  					be64_to_cpu(iclog->ic_header.h_lsn);
>  				spin_unlock(&log->l_grant_lock);
>  
> -				/*
> -				 * Keep processing entries in the callback list
> -				 * until we come around and it is empty.  We
> -				 * need to atomically see that the list is
> -				 * empty and change the state to DIRTY so that
> -				 * we don't miss any more callbacks being added.
> -				 */
> -				spin_lock(&log->l_icloglock);
>  			} else {
> +				spin_unlock(&log->l_icloglock);
>  				ioerrors++;
>  			}
> -			cb = iclog->ic_callback;
>  
> +			/*
> +			 * Keep processing entries in the callback list until
> +			 * we come around and it is empty.  We need to
> +			 * atomically see that the list is empty and change the
> +			 * state to DIRTY so that we don't miss any more
> +			 * callbacks being added.
> +			 */
> +			spin_lock(&iclog->ic_callback_lock);
> +			cb = iclog->ic_callback;
>  			while (cb) {
>  				iclog->ic_callback_tail = &(iclog->ic_callback);
>  				iclog->ic_callback = NULL;
> -				spin_unlock(&log->l_icloglock);
> +				spin_unlock(&iclog->ic_callback_lock);
>  
>  				/* perform callbacks in the order given */
>  				for (; cb; cb = cb_next) {
>  					cb_next = cb->cb_next;
>  					cb->cb_func(cb->cb_arg, aborted);
>  				}
> -				spin_lock(&log->l_icloglock);
> +				spin_lock(&iclog->ic_callback_lock);
>  				cb = iclog->ic_callback;
>  			}
>  
>  			loopdidcallbacks++;
>  			funcdidcallbacks++;
>  
> +			spin_lock(&log->l_icloglock);
>  			ASSERT(iclog->ic_callback == NULL);
> +			spin_unlock(&iclog->ic_callback_lock);
Okay so we can acquire the l_icloglock while holding ic_callback_lock.

>  			if (!(iclog->ic_state & XLOG_STATE_IOERROR))
>  				iclog->ic_state = XLOG_STATE_DIRTY;
>  
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-02-22 13:48:25.000000000 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-03-13 19:34:57.430809151 +1100
> @@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
>   * - ic_offset is the current number of bytes written to in this iclog.
>   * - ic_refcnt is bumped when someone is writing to the log.
>   * - ic_state is the state of the iclog.
> + *
> + * Because of cacheline contention on large machines, we need to separate
> + * various resources onto different cachelines. To start with, make the
> + * structure cacheline aligned. The following fields can be contended on
> + * by independent processes:
> + *
> + *	- ic_callback_*
> + *	- ic_refcnt
> + *	- fields protected by the global l_icloglock
> + *
> + * so we need to ensure that these fields are located in separate cachelines.
> + * We'll put all the read-only and l_icloglock fields in the first cacheline,
> + * and move everything else out to subsequent cachelines.
>   */
>  typedef struct xlog_iclog_fields {
>  	sv_t			ic_forcesema;
> @@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
>  	struct xlog_in_core	*ic_prev;
>  	struct xfs_buf		*ic_bp;
>  	struct log		*ic_log;
> -	xfs_log_callback_t	*ic_callback;
> -	xfs_log_callback_t	**ic_callback_tail;
> -#ifdef XFS_LOG_TRACE
> -	struct ktrace		*ic_trace;
> -#endif
>  	int			ic_size;
>  	int			ic_offset;
> -	atomic_t		ic_refcnt;
>  	int			ic_bwritecnt;
>  	ushort_t		ic_state;
>  	char			*ic_datap;	/* pointer to iclog data */
> -} xlog_iclog_fields_t;
> +#ifdef XFS_LOG_TRACE
> +	struct ktrace		*ic_trace;
> +#endif
> +
> +	/* Callback structures need their own cacheline */
> +	spinlock_t		ic_callback_lock ____cacheline_aligned_in_smp;
> +	xfs_log_callback_t	*ic_callback;
> +	xfs_log_callback_t	**ic_callback_tail;
> +
> +	/* reference counts need their own cacheline */
> +	atomic_t		ic_refcnt ____cacheline_aligned_in_smp;
> +} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
>  
>  typedef union xlog_in_core2 {
>  	xlog_rec_header_t	hic_header;
> @@ -366,6 +384,7 @@ typedef struct xlog_in_core {
>  #define	ic_bp		hic_fields.ic_bp
>  #define	ic_log		hic_fields.ic_log
>  #define	ic_callback	hic_fields.ic_callback
> +#define	ic_callback_lock hic_fields.ic_callback_lock
>  #define	ic_callback_tail hic_fields.ic_callback_tail
>  #define	ic_trace	hic_fields.ic_trace
>  #define	ic_size		hic_fields.ic_size
> 

  parent reply	other threads:[~2008-04-02  4:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-01 23:13 [Patch] Per iclog callback chain lock David Chinner
2008-04-01 23:24 ` David Chinner
2008-04-02  5:09 ` Lachlan McIlroy [this message]
2008-04-02  4:39   ` David Chinner
2008-04-02  6:19     ` Lachlan McIlroy
2008-04-07 12:51 ` Christoph Hellwig
2008-04-07 22:17   ` David Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47F3150B.6000106@sgi.com \
    --to=lachlan@sgi.com \
    --cc=dgc@sgi.com \
    --cc=xfs-dev@sgi.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.