public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [patch] Use atomics for iclog reference counting
@ 2008-01-21  5:30 David Chinner
  2008-01-23  5:13 ` Timothy Shimmin
  0 siblings, 1 reply; 7+ messages in thread
From: David Chinner @ 2008-01-21  5:30 UTC (permalink / raw)
  To: xfs-dev; +Cc: xfs-oss

Now that we update the log tail LSN less frequently on
transaction completion, we pass the contention straight to
the global block stat lock (l_iclog_lock) during transaction
completion.

We currently have to take this lock to decrement the iclog
reference count. there is a reference count on each iclog,
so we need to take þhe global lock for all refcount changes.

When large numbers of processes are all doing small trnasctions,
the iclog reference counts will be quite high, and the state change
that absolutely requires the l_iclog_lock is the except rather than
the norm.

Change the reference counting on the iclogs to use atomic_inc/dec
so that we can use atomic_dec_and_lock during transaction completion
and avoid the need for grabbing the l_iclog_lock for every reference
count decrement except the one that matters - the last.

Signed-off-by: Dave Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_log.c      |   36 +++++++++++++++++++++---------------
 fs/xfs/xfs_log_priv.h |    2 +-
 fs/xfs/xfsidbg.c      |    2 +-
 3 files changed, 23 insertions(+), 17 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-01-21 16:16:51.804146394 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-01-21 16:23:35.369691221 +1100
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 		xlog_state_want_sync(log, iclog);
 		(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 */
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 
 		xlog_state_want_sync(log, iclog);
@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t		*log,
 	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
 
 	XFS_STATS_INC(xs_log_writes);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 
 	/* Add for LR header */
 	count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2311,7 +2311,7 @@ xlog_state_done_syncing(
 
 	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
 	       iclog->ic_state == XLOG_STATE_IOERROR);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
 
 
@@ -2393,7 +2393,7 @@ restart:
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
 	head = &iclog->ic_header;
 
-	iclog->ic_refcnt++;			/* prevents sync */
+	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
 	log_offset = iclog->ic_offset;
 
 	/* On the 1st write to an iclog, figure out lsn.  This works
@@ -2425,12 +2425,12 @@ restart:
 		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
 
 		/* If I'm the only one writing to this iclog, sync it to disk */
-		if (iclog->ic_refcnt == 1) {
+		if (atomic_read(&iclog->ic_refcnt) == 1) {
 			spin_unlock(&log->l_icloglock);
 			if ((error = xlog_state_release_iclog(log, iclog)))
 				return error;
 		} else {
-			iclog->ic_refcnt--;
+			atomic_dec(&iclog->ic_refcnt);
 			spin_unlock(&log->l_icloglock);
 		}
 		goto restart;
@@ -2821,18 +2821,23 @@ xlog_state_release_iclog(
 {
 	int		sync = 0;	/* do we sync? */
 
-	spin_lock(&log->l_icloglock);
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		spin_unlock(&log->l_icloglock);
 		return XFS_ERROR(EIO);
 	}
 
-	ASSERT(iclog->ic_refcnt > 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+		return 0;
+
+	if (iclog->ic_state & XLOG_STATE_IOERROR) {
+		spin_unlock(&log->l_icloglock);
+		return XFS_ERROR(EIO);
+	}
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
 	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
 
-	if (--iclog->ic_refcnt == 0 &&
-	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
 		/* update tail before writing to iclog */
 		xlog_assign_tail_lsn(log->l_mp);
 		sync++;
@@ -2952,7 +2957,8 @@ xlog_state_sync_all(xlog_t *log, uint fl
 		 * previous iclog and go to sleep.
 		 */
 		if (iclog->ic_state == XLOG_STATE_DIRTY ||
-		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+		    (atomic_read(&iclog->ic_refcnt) == 0
+		     && iclog->ic_offset == 0)) {
 			iclog = iclog->ic_prev;
 			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
 			    iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2960,14 +2966,14 @@ xlog_state_sync_all(xlog_t *log, uint fl
 			else
 				goto maybe_sleep;
 		} else {
-			if (iclog->ic_refcnt == 0) {
+			if (atomic_read(&iclog->ic_refcnt) == 0) {
 				/* We are the only one with access to this
 				 * iclog.  Flush it out now.  There should
 				 * be a roundoff of zero to show that someone
 				 * has already taken care of the roundoff from
 				 * the previous sync.
 				 */
-				iclog->ic_refcnt++;
+				atomic_inc(&iclog->ic_refcnt);
 				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 				xlog_state_switch_iclogs(log, iclog, 0);
 				spin_unlock(&log->l_icloglock);
@@ -3099,7 +3105,7 @@ try_again:
 			already_slept = 1;
 			goto try_again;
 		} else {
-			iclog->ic_refcnt++;
+			atomic_inc(&iclog->ic_refcnt);
 			xlog_state_switch_iclogs(log, iclog, 0);
 			spin_unlock(&log->l_icloglock);
 			if (xlog_state_release_iclog(log, iclog))
Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-01-21 16:06:27.127557437 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-01-21 16:23:35.369691221 +1100
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
 #endif
 	int			ic_size;
 	int			ic_offset;
-	int			ic_refcnt;
+	atomic_t		ic_refcnt;
 	int			ic_bwritecnt;
 	ushort_t		ic_state;
 	char			*ic_datap;	/* pointer to iclog data */
Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-01-21 16:06:27.127557437 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-01-21 16:23:35.385689220 +1100
@@ -5633,7 +5633,7 @@ xfsidbg_xiclog(xlog_in_core_t *iclog)
 #else
 		NULL,
 #endif
-		iclog->ic_refcnt, iclog->ic_bwritecnt);
+		atomic_read(&iclog->ic_refcnt), iclog->ic_bwritecnt);
 	if (iclog->ic_state & XLOG_STATE_ALL)
 		printflags(iclog->ic_state, ic_flags, " state:");
 	else

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-01-21  5:30 [patch] Use atomics for iclog reference counting David Chinner
@ 2008-01-23  5:13 ` Timothy Shimmin
  2008-02-14 23:47   ` David Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Timothy Shimmin @ 2008-01-23  5:13 UTC (permalink / raw)
  To: David Chinner; +Cc: xfs-dev, xfs-oss

I'll have a look...

--Tim

David Chinner wrote:
> Now that we update the log tail LSN less frequently on
> transaction completion, we pass the contention straight to
> the global block stat lock (l_iclog_lock) during transaction
> completion.
> 
> We currently have to take this lock to decrement the iclog
> reference count. there is a reference count on each iclog,
> so we need to take þhe global lock for all refcount changes.
> 
> When large numbers of processes are all doing small trnasctions,
> the iclog reference counts will be quite high, and the state change
> that absolutely requires the l_iclog_lock is the except rather than
> the norm.
> 
> Change the reference counting on the iclogs to use atomic_inc/dec
> so that we can use atomic_dec_and_lock during transaction completion
> and avoid the need for grabbing the l_iclog_lock for every reference
> count decrement except the one that matters - the last.
> 
> Signed-off-by: Dave Chinner <dgc@sgi.com>
> ---
>  fs/xfs/xfs_log.c      |   36 +++++++++++++++++++++---------------
>  fs/xfs/xfs_log_priv.h |    2 +-
>  fs/xfs/xfsidbg.c      |    2 +-
>  3 files changed, 23 insertions(+), 17 deletions(-)
> 
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-01-21 16:16:51.804146394 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-01-21 16:23:35.369691221 +1100
> @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>  
>  		spin_lock(&log->l_icloglock);
>  		iclog = log->l_iclog;
> -		iclog->ic_refcnt++;
> +		atomic_inc(&iclog->ic_refcnt);
>  		spin_unlock(&log->l_icloglock);
>  		xlog_state_want_sync(log, iclog);
>  		(void) xlog_state_release_iclog(log, iclog);
> @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>  		 */
>  		spin_lock(&log->l_icloglock);
>  		iclog = log->l_iclog;
> -		iclog->ic_refcnt++;
> +		atomic_inc(&iclog->ic_refcnt);
>  		spin_unlock(&log->l_icloglock);
>  
>  		xlog_state_want_sync(log, iclog);
> @@ -1405,7 +1405,7 @@ xlog_sync(xlog_t		*log,
>  	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
>  
>  	XFS_STATS_INC(xs_log_writes);
> -	ASSERT(iclog->ic_refcnt == 0);
> +	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
>  
>  	/* Add for LR header */
>  	count_init = log->l_iclog_hsize + iclog->ic_offset;
> @@ -2311,7 +2311,7 @@ xlog_state_done_syncing(
>  
>  	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
>  	       iclog->ic_state == XLOG_STATE_IOERROR);
> -	ASSERT(iclog->ic_refcnt == 0);
> +	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
>  	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
>  
>  
> @@ -2393,7 +2393,7 @@ restart:
>  	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
>  	head = &iclog->ic_header;
>  
> -	iclog->ic_refcnt++;			/* prevents sync */
> +	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
>  	log_offset = iclog->ic_offset;
>  
>  	/* On the 1st write to an iclog, figure out lsn.  This works
> @@ -2425,12 +2425,12 @@ restart:
>  		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
>  
>  		/* If I'm the only one writing to this iclog, sync it to disk */
> -		if (iclog->ic_refcnt == 1) {
> +		if (atomic_read(&iclog->ic_refcnt) == 1) {
>  			spin_unlock(&log->l_icloglock);
>  			if ((error = xlog_state_release_iclog(log, iclog)))
>  				return error;
>  		} else {
> -			iclog->ic_refcnt--;
> +			atomic_dec(&iclog->ic_refcnt);
>  			spin_unlock(&log->l_icloglock);
>  		}
>  		goto restart;
> @@ -2821,18 +2821,23 @@ xlog_state_release_iclog(
>  {
>  	int		sync = 0;	/* do we sync? */
>  
> -	spin_lock(&log->l_icloglock);
>  	if (iclog->ic_state & XLOG_STATE_IOERROR) {
>  		spin_unlock(&log->l_icloglock);
>  		return XFS_ERROR(EIO);
>  	}
>  
> -	ASSERT(iclog->ic_refcnt > 0);
> +	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
> +	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
> +		return 0;
> +
> +	if (iclog->ic_state & XLOG_STATE_IOERROR) {
> +		spin_unlock(&log->l_icloglock);
> +		return XFS_ERROR(EIO);
> +	}
>  	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
>  	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
>  
> -	if (--iclog->ic_refcnt == 0 &&
> -	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
> +	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
>  		/* update tail before writing to iclog */
>  		xlog_assign_tail_lsn(log->l_mp);
>  		sync++;
> @@ -2952,7 +2957,8 @@ xlog_state_sync_all(xlog_t *log, uint fl
>  		 * previous iclog and go to sleep.
>  		 */
>  		if (iclog->ic_state == XLOG_STATE_DIRTY ||
> -		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
> +		    (atomic_read(&iclog->ic_refcnt) == 0
> +		     && iclog->ic_offset == 0)) {
>  			iclog = iclog->ic_prev;
>  			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
>  			    iclog->ic_state == XLOG_STATE_DIRTY)
> @@ -2960,14 +2966,14 @@ xlog_state_sync_all(xlog_t *log, uint fl
>  			else
>  				goto maybe_sleep;
>  		} else {
> -			if (iclog->ic_refcnt == 0) {
> +			if (atomic_read(&iclog->ic_refcnt) == 0) {
>  				/* We are the only one with access to this
>  				 * iclog.  Flush it out now.  There should
>  				 * be a roundoff of zero to show that someone
>  				 * has already taken care of the roundoff from
>  				 * the previous sync.
>  				 */
> -				iclog->ic_refcnt++;
> +				atomic_inc(&iclog->ic_refcnt);
>  				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
>  				xlog_state_switch_iclogs(log, iclog, 0);
>  				spin_unlock(&log->l_icloglock);
> @@ -3099,7 +3105,7 @@ try_again:
>  			already_slept = 1;
>  			goto try_again;
>  		} else {
> -			iclog->ic_refcnt++;
> +			atomic_inc(&iclog->ic_refcnt);
>  			xlog_state_switch_iclogs(log, iclog, 0);
>  			spin_unlock(&log->l_icloglock);
>  			if (xlog_state_release_iclog(log, iclog))
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-01-21 16:06:27.127557437 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-01-21 16:23:35.369691221 +1100
> @@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
>  #endif
>  	int			ic_size;
>  	int			ic_offset;
> -	int			ic_refcnt;
> +	atomic_t		ic_refcnt;
>  	int			ic_bwritecnt;
>  	ushort_t		ic_state;
>  	char			*ic_datap;	/* pointer to iclog data */
> Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-01-21 16:06:27.127557437 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-01-21 16:23:35.385689220 +1100
> @@ -5633,7 +5633,7 @@ xfsidbg_xiclog(xlog_in_core_t *iclog)
>  #else
>  		NULL,
>  #endif
> -		iclog->ic_refcnt, iclog->ic_bwritecnt);
> +		atomic_read(&iclog->ic_refcnt), iclog->ic_bwritecnt);
>  	if (iclog->ic_state & XLOG_STATE_ALL)
>  		printflags(iclog->ic_state, ic_flags, " state:");
>  	else

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-01-23  5:13 ` Timothy Shimmin
@ 2008-02-14 23:47   ` David Chinner
  2008-02-15  0:24     ` David Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: David Chinner @ 2008-02-14 23:47 UTC (permalink / raw)
  To: Timothy Shimmin; +Cc: David Chinner, xfs-dev, xfs-oss

On Wed, Jan 23, 2008 at 04:13:25PM +1100, Timothy Shimmin wrote:
> I'll have a look...

Tim, have you had a chance to look at this one yet? I'd like to
push this too, but I understand you are kinda busy right now :/

Cheers,

Dave.

> David Chinner wrote:
> >Now that we update the log tail LSN less frequently on
> >transaction completion, we pass the contention straight to
> >the global block stat lock (l_iclog_lock) during transaction
> >completion.
> >
> >We currently have to take this lock to decrement the iclog
> >reference count. there is a reference count on each iclog,
> >so we need to take þhe global lock for all refcount changes.
> >
> >When large numbers of processes are all doing small trnasctions,
> >the iclog reference counts will be quite high, and the state change
> >that absolutely requires the l_iclog_lock is the except rather than
> >the norm.
> >
> >Change the reference counting on the iclogs to use atomic_inc/dec
> >so that we can use atomic_dec_and_lock during transaction completion
> >and avoid the need for grabbing the l_iclog_lock for every reference
> >count decrement except the one that matters - the last.
> >
> >Signed-off-by: Dave Chinner <dgc@sgi.com>
> >---
> > fs/xfs/xfs_log.c      |   36 +++++++++++++++++++++---------------
> > fs/xfs/xfs_log_priv.h |    2 +-
> > fs/xfs/xfsidbg.c      |    2 +-
> > 3 files changed, 23 insertions(+), 17 deletions(-)
> >
> >Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
> >===================================================================
> >--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-01-21 
> >16:16:51.804146394 +1100
> >+++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-01-21 16:23:35.369691221 +1100
> >@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
> > 
> > 		spin_lock(&log->l_icloglock);
> > 		iclog = log->l_iclog;
> >-		iclog->ic_refcnt++;
> >+		atomic_inc(&iclog->ic_refcnt);
> > 		spin_unlock(&log->l_icloglock);
> > 		xlog_state_want_sync(log, iclog);
> > 		(void) xlog_state_release_iclog(log, iclog);
> >@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
> > 		 */
> > 		spin_lock(&log->l_icloglock);
> > 		iclog = log->l_iclog;
> >-		iclog->ic_refcnt++;
> >+		atomic_inc(&iclog->ic_refcnt);
> > 		spin_unlock(&log->l_icloglock);
> > 
> > 		xlog_state_want_sync(log, iclog);
> >@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t		*log,
> > 	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
> > 
> > 	XFS_STATS_INC(xs_log_writes);
> >-	ASSERT(iclog->ic_refcnt == 0);
> >+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
> > 
> > 	/* Add for LR header */
> > 	count_init = log->l_iclog_hsize + iclog->ic_offset;
> >@@ -2311,7 +2311,7 @@ xlog_state_done_syncing(
> > 
> > 	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
> > 	       iclog->ic_state == XLOG_STATE_IOERROR);
> >-	ASSERT(iclog->ic_refcnt == 0);
> >+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
> > 	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
> > 
> > 
> >@@ -2393,7 +2393,7 @@ restart:
> > 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
> > 	head = &iclog->ic_header;
> > 
> >-	iclog->ic_refcnt++;			/* prevents sync */
> >+	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
> > 	log_offset = iclog->ic_offset;
> > 
> > 	/* On the 1st write to an iclog, figure out lsn.  This works
> >@@ -2425,12 +2425,12 @@ restart:
> > 		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
> > 
> > 		/* If I'm the only one writing to this iclog, sync it to 
> > 		disk */
> >-		if (iclog->ic_refcnt == 1) {
> >+		if (atomic_read(&iclog->ic_refcnt) == 1) {
> > 			spin_unlock(&log->l_icloglock);
> > 			if ((error = xlog_state_release_iclog(log, iclog)))
> > 				return error;
> > 		} else {
> >-			iclog->ic_refcnt--;
> >+			atomic_dec(&iclog->ic_refcnt);
> > 			spin_unlock(&log->l_icloglock);
> > 		}
> > 		goto restart;
> >@@ -2821,18 +2821,23 @@ xlog_state_release_iclog(
> > {
> > 	int		sync = 0;	/* do we sync? */
> > 
> >-	spin_lock(&log->l_icloglock);
> > 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
> > 		spin_unlock(&log->l_icloglock);
> > 		return XFS_ERROR(EIO);
> > 	}
> > 
> >-	ASSERT(iclog->ic_refcnt > 0);
> >+	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
> >+	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
> >+		return 0;
> >+
> >+	if (iclog->ic_state & XLOG_STATE_IOERROR) {
> >+		spin_unlock(&log->l_icloglock);
> >+		return XFS_ERROR(EIO);
> >+	}
> > 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
> > 	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
> > 
> >-	if (--iclog->ic_refcnt == 0 &&
> >-	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
> >+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
> > 		/* update tail before writing to iclog */
> > 		xlog_assign_tail_lsn(log->l_mp);
> > 		sync++;
> >@@ -2952,7 +2957,8 @@ xlog_state_sync_all(xlog_t *log, uint fl
> > 		 * previous iclog and go to sleep.
> > 		 */
> > 		if (iclog->ic_state == XLOG_STATE_DIRTY ||
> >-		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
> >+		    (atomic_read(&iclog->ic_refcnt) == 0
> >+		     && iclog->ic_offset == 0)) {
> > 			iclog = iclog->ic_prev;
> > 			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
> > 			    iclog->ic_state == XLOG_STATE_DIRTY)
> >@@ -2960,14 +2966,14 @@ xlog_state_sync_all(xlog_t *log, uint fl
> > 			else
> > 				goto maybe_sleep;
> > 		} else {
> >-			if (iclog->ic_refcnt == 0) {
> >+			if (atomic_read(&iclog->ic_refcnt) == 0) {
> > 				/* We are the only one with access to this
> > 				 * iclog.  Flush it out now.  There should
> > 				 * be a roundoff of zero to show that someone
> > 				 * has already taken care of the roundoff 
> > 				 from
> > 				 * the previous sync.
> > 				 */
> >-				iclog->ic_refcnt++;
> >+				atomic_inc(&iclog->ic_refcnt);
> > 				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
> > 				xlog_state_switch_iclogs(log, iclog, 0);
> > 				spin_unlock(&log->l_icloglock);
> >@@ -3099,7 +3105,7 @@ try_again:
> > 			already_slept = 1;
> > 			goto try_again;
> > 		} else {
> >-			iclog->ic_refcnt++;
> >+			atomic_inc(&iclog->ic_refcnt);
> > 			xlog_state_switch_iclogs(log, iclog, 0);
> > 			spin_unlock(&log->l_icloglock);
> > 			if (xlog_state_release_iclog(log, iclog))
> >Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
> >===================================================================
> >--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-01-21 
> >16:06:27.127557437 +1100
> >+++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-01-21 
> >16:23:35.369691221 +1100
> >@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
> > #endif
> > 	int			ic_size;
> > 	int			ic_offset;
> >-	int			ic_refcnt;
> >+	atomic_t		ic_refcnt;
> > 	int			ic_bwritecnt;
> > 	ushort_t		ic_state;
> > 	char			*ic_datap;	/* pointer to iclog data */
> >Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
> >===================================================================
> >--- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-01-21 
> >16:06:27.127557437 +1100
> >+++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-01-21 16:23:35.385689220 +1100
> >@@ -5633,7 +5633,7 @@ xfsidbg_xiclog(xlog_in_core_t *iclog)
> > #else
> > 		NULL,
> > #endif
> >-		iclog->ic_refcnt, iclog->ic_bwritecnt);
> >+		atomic_read(&iclog->ic_refcnt), iclog->ic_bwritecnt);
> > 	if (iclog->ic_state & XLOG_STATE_ALL)
> > 		printflags(iclog->ic_state, ic_flags, " state:");
> > 	else
> 

-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-02-14 23:47   ` David Chinner
@ 2008-02-15  0:24     ` David Chinner
  2008-02-15  3:34       ` Timothy Shimmin
  0 siblings, 1 reply; 7+ messages in thread
From: David Chinner @ 2008-02-15  0:24 UTC (permalink / raw)
  To: David Chinner; +Cc: Timothy Shimmin, xfs-dev, xfs-oss

On Fri, Feb 15, 2008 at 10:47:58AM +1100, David Chinner wrote:
> On Wed, Jan 23, 2008 at 04:13:25PM +1100, Timothy Shimmin wrote:
> > I'll have a look...
> 
> Tim, have you had a chance to look at this one yet? I'd like to
> push this too, but I understand you are kinda busy right now :/

FWIW, you might want to review this version ;)

----

Now that we update the log tail LSN less frequently on
transaction completion, we pass the contention straight to
the global log state lock (l_iclog_lock) during transaction
completion.

We currently have to take this lock to decrement the iclog
reference count. there is a reference count on each iclog,
so we need to take þhe global lock for all refcount changes.

When large numbers of processes are all doing small trnasctions,
the iclog reference counts will be quite high, and the state change
that absolutely requires the l_iclog_lock is the except rather than
the norm.

Change the reference counting on the iclogs to use atomic_inc/dec
so that we can use atomic_dec_and_lock during transaction completion
and avoid the need for grabbing the l_iclog_lock for every reference
count decrement except the one that matters - the last.

Version 2:
o remove spurious unlock in shutdown path in xlog_state_release_iclog()

Signed-off-by: Dave Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_log.c      |   36 ++++++++++++++++++++----------------
 fs/xfs/xfs_log_priv.h |    2 +-
 fs/xfs/xfsidbg.c      |    2 +-
 3 files changed, 22 insertions(+), 18 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-02-15 11:19:08.076544539 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-02-15 11:20:22.558911855 +1100
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 		xlog_state_want_sync(log, iclog);
 		(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 */
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 
 		xlog_state_want_sync(log, iclog);
@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t		*log,
 	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
 
 	XFS_STATS_INC(xs_log_writes);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 
 	/* Add for LR header */
 	count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2312,7 +2312,7 @@ xlog_state_done_syncing(
 
 	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
 	       iclog->ic_state == XLOG_STATE_IOERROR);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
 
 
@@ -2394,7 +2394,7 @@ restart:
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
 	head = &iclog->ic_header;
 
-	iclog->ic_refcnt++;			/* prevents sync */
+	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
 	log_offset = iclog->ic_offset;
 
 	/* On the 1st write to an iclog, figure out lsn.  This works
@@ -2426,12 +2426,12 @@ restart:
 		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
 
 		/* If I'm the only one writing to this iclog, sync it to disk */
-		if (iclog->ic_refcnt == 1) {
+		if (atomic_read(&iclog->ic_refcnt) == 1) {
 			spin_unlock(&log->l_icloglock);
 			if ((error = xlog_state_release_iclog(log, iclog)))
 				return error;
 		} else {
-			iclog->ic_refcnt--;
+			atomic_dec(&iclog->ic_refcnt);
 			spin_unlock(&log->l_icloglock);
 		}
 		goto restart;
@@ -2822,18 +2822,21 @@ xlog_state_release_iclog(
 {
 	int		sync = 0;	/* do we sync? */
 
-	spin_lock(&log->l_icloglock);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		return XFS_ERROR(EIO);
+
+	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+		return 0;
+
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		spin_unlock(&log->l_icloglock);
 		return XFS_ERROR(EIO);
 	}
-
-	ASSERT(iclog->ic_refcnt > 0);
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
 	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
 
-	if (--iclog->ic_refcnt == 0 &&
-	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
 		/* update tail before writing to iclog */
 		xlog_assign_tail_lsn(log->l_mp);
 		sync++;
@@ -2953,7 +2956,8 @@ xlog_state_sync_all(xlog_t *log, uint fl
 		 * previous iclog and go to sleep.
 		 */
 		if (iclog->ic_state == XLOG_STATE_DIRTY ||
-		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+		    (atomic_read(&iclog->ic_refcnt) == 0
+		     && iclog->ic_offset == 0)) {
 			iclog = iclog->ic_prev;
 			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
 			    iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2965,14 @@ xlog_state_sync_all(xlog_t *log, uint fl
 			else
 				goto maybe_sleep;
 		} else {
-			if (iclog->ic_refcnt == 0) {
+			if (atomic_read(&iclog->ic_refcnt) == 0) {
 				/* We are the only one with access to this
 				 * iclog.  Flush it out now.  There should
 				 * be a roundoff of zero to show that someone
 				 * has already taken care of the roundoff from
 				 * the previous sync.
 				 */
-				iclog->ic_refcnt++;
+				atomic_inc(&iclog->ic_refcnt);
 				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 				xlog_state_switch_iclogs(log, iclog, 0);
 				spin_unlock(&log->l_icloglock);
@@ -3100,7 +3104,7 @@ try_again:
 			already_slept = 1;
 			goto try_again;
 		} else {
-			iclog->ic_refcnt++;
+			atomic_inc(&iclog->ic_refcnt);
 			xlog_state_switch_iclogs(log, iclog, 0);
 			spin_unlock(&log->l_icloglock);
 			if (xlog_state_release_iclog(log, iclog))
Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-02-15 11:19:08.080544022 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-02-15 11:19:14.403726218 +1100
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
 #endif
 	int			ic_size;
 	int			ic_offset;
-	int			ic_refcnt;
+	atomic_t		ic_refcnt;
 	int			ic_bwritecnt;
 	ushort_t		ic_state;
 	char			*ic_datap;	/* pointer to iclog data */
Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-02-15 11:19:08.096541953 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-02-15 11:19:14.407725701 +1100
@@ -5633,7 +5633,7 @@ xfsidbg_xiclog(xlog_in_core_t *iclog)
 #else
 		NULL,
 #endif
-		iclog->ic_refcnt, iclog->ic_bwritecnt);
+		atomic_read(&iclog->ic_refcnt), iclog->ic_bwritecnt);
 	if (iclog->ic_state & XLOG_STATE_ALL)
 		printflags(iclog->ic_state, ic_flags, " state:");
 	else

-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-02-15  0:24     ` David Chinner
@ 2008-02-15  3:34       ` Timothy Shimmin
  2008-02-15  4:27         ` David Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Timothy Shimmin @ 2008-02-15  3:34 UTC (permalink / raw)
  To: David Chinner; +Cc: xfs-dev, xfs-oss

Dave,

So a bunch of incs/decs/tests converted to the atomic versions.
And the interesting stuff appears to be in xlog_state_release_iclog().
Okay that looks reasonable.
If the decrement of the cnt doesn't go down to zero then we just
return straight away - because we won't be going to sync anything.
And if we do go to zero then we take the lock and continue.
Why do we test for the error/EIO beforehand now too?
Because we don't want to return 0 if we have an error to return?
Seems good.

In the 1st 2 cases of the patch:
 > @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 >
 >  		spin_lock(&log->l_icloglock);
 >  		iclog = log->l_iclog;
 > -		iclog->ic_refcnt++;
 > +		atomic_inc(&iclog->ic_refcnt);
 >  		spin_unlock(&log->l_icloglock);
 >  		xlog_state_want_sync(log, iclog);
 >  		(void) xlog_state_release_iclog(log, iclog);
 > @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 >  		 */
 >  		spin_lock(&log->l_icloglock);
 >  		iclog = log->l_iclog;
 > -		iclog->ic_refcnt++;
 > +		atomic_inc(&iclog->ic_refcnt);
 >  		spin_unlock(&log->l_icloglock);

Do we still really need to take the lock etc?

--Tim

David Chinner wrote:
> On Fri, Feb 15, 2008 at 10:47:58AM +1100, David Chinner wrote:
>> On Wed, Jan 23, 2008 at 04:13:25PM +1100, Timothy Shimmin wrote:
>>> I'll have a look...
>> Tim, have you had a chance to look at this one yet? I'd like to
>> push this too, but I understand you are kinda busy right now :/
> 
> FWIW, you might want to review this version ;)
> 
> ----
> 
> Now that we update the log tail LSN less frequently on
> transaction completion, we pass the contention straight to
> the global log state lock (l_iclog_lock) during transaction
> completion.
> 
> We currently have to take this lock to decrement the iclog
> reference count. there is a reference count on each iclog,
> so we need to take the global lock for all refcount changes.
> 
> When large numbers of processes are all doing small trnasctions,
> the iclog reference counts will be quite high, and the state change
> that absolutely requires the l_iclog_lock is the except rather than
> the norm.
> 
> Change the reference counting on the iclogs to use atomic_inc/dec
> so that we can use atomic_dec_and_lock during transaction completion
> and avoid the need for grabbing the l_iclog_lock for every reference
> count decrement except the one that matters - the last.
> 
> Version 2:
> o remove spurious unlock in shutdown path in xlog_state_release_iclog()
> 
> Signed-off-by: Dave Chinner <dgc@sgi.com>
> ---
>  fs/xfs/xfs_log.c      |   36 ++++++++++++++++++++----------------
>  fs/xfs/xfs_log_priv.h |    2 +-
>  fs/xfs/xfsidbg.c      |    2 +-
>  3 files changed, 22 insertions(+), 18 deletions(-)
> 
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-02-15 11:19:08.076544539 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-02-15 11:20:22.558911855 +1100
> @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>  
>  		spin_lock(&log->l_icloglock);
>  		iclog = log->l_iclog;
> -		iclog->ic_refcnt++;
> +		atomic_inc(&iclog->ic_refcnt);
>  		spin_unlock(&log->l_icloglock);
>  		xlog_state_want_sync(log, iclog);
>  		(void) xlog_state_release_iclog(log, iclog);
> @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>  		 */
>  		spin_lock(&log->l_icloglock);
>  		iclog = log->l_iclog;
> -		iclog->ic_refcnt++;
> +		atomic_inc(&iclog->ic_refcnt);
>  		spin_unlock(&log->l_icloglock);
>  
>  		xlog_state_want_sync(log, iclog);
> @@ -1405,7 +1405,7 @@ xlog_sync(xlog_t		*log,
>  	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
>  
>  	XFS_STATS_INC(xs_log_writes);
> -	ASSERT(iclog->ic_refcnt == 0);
> +	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
>  
>  	/* Add for LR header */
>  	count_init = log->l_iclog_hsize + iclog->ic_offset;
> @@ -2312,7 +2312,7 @@ xlog_state_done_syncing(
>  
>  	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
>  	       iclog->ic_state == XLOG_STATE_IOERROR);
> -	ASSERT(iclog->ic_refcnt == 0);
> +	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
>  	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
>  
>  
> @@ -2394,7 +2394,7 @@ restart:
>  	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
>  	head = &iclog->ic_header;
>  
> -	iclog->ic_refcnt++;			/* prevents sync */
> +	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
>  	log_offset = iclog->ic_offset;
>  
>  	/* On the 1st write to an iclog, figure out lsn.  This works
> @@ -2426,12 +2426,12 @@ restart:
>  		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
>  
>  		/* If I'm the only one writing to this iclog, sync it to disk */
> -		if (iclog->ic_refcnt == 1) {
> +		if (atomic_read(&iclog->ic_refcnt) == 1) {
>  			spin_unlock(&log->l_icloglock);
>  			if ((error = xlog_state_release_iclog(log, iclog)))
>  				return error;
>  		} else {
> -			iclog->ic_refcnt--;
> +			atomic_dec(&iclog->ic_refcnt);
>  			spin_unlock(&log->l_icloglock);
>  		}
>  		goto restart;
> @@ -2822,18 +2822,21 @@ xlog_state_release_iclog(
>  {
>  	int		sync = 0;	/* do we sync? */
>  
> -	spin_lock(&log->l_icloglock);
> +	if (iclog->ic_state & XLOG_STATE_IOERROR)
> +		return XFS_ERROR(EIO);
> +
> +	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
> +	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
> +		return 0;
> +
>  	if (iclog->ic_state & XLOG_STATE_IOERROR) {
>  		spin_unlock(&log->l_icloglock);
>  		return XFS_ERROR(EIO);
>  	}
> -
> -	ASSERT(iclog->ic_refcnt > 0);
>  	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
>  	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
>  
> -	if (--iclog->ic_refcnt == 0 &&
> -	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
> +	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
>  		/* update tail before writing to iclog */
>  		xlog_assign_tail_lsn(log->l_mp);
>  		sync++;
> @@ -2953,7 +2956,8 @@ xlog_state_sync_all(xlog_t *log, uint fl
>  		 * previous iclog and go to sleep.
>  		 */
>  		if (iclog->ic_state == XLOG_STATE_DIRTY ||
> -		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
> +		    (atomic_read(&iclog->ic_refcnt) == 0
> +		     && iclog->ic_offset == 0)) {
>  			iclog = iclog->ic_prev;
>  			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
>  			    iclog->ic_state == XLOG_STATE_DIRTY)
> @@ -2961,14 +2965,14 @@ xlog_state_sync_all(xlog_t *log, uint fl
>  			else
>  				goto maybe_sleep;
>  		} else {
> -			if (iclog->ic_refcnt == 0) {
> +			if (atomic_read(&iclog->ic_refcnt) == 0) {
>  				/* We are the only one with access to this
>  				 * iclog.  Flush it out now.  There should
>  				 * be a roundoff of zero to show that someone
>  				 * has already taken care of the roundoff from
>  				 * the previous sync.
>  				 */
> -				iclog->ic_refcnt++;
> +				atomic_inc(&iclog->ic_refcnt);
>  				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
>  				xlog_state_switch_iclogs(log, iclog, 0);
>  				spin_unlock(&log->l_icloglock);
> @@ -3100,7 +3104,7 @@ try_again:
>  			already_slept = 1;
>  			goto try_again;
>  		} else {
> -			iclog->ic_refcnt++;
> +			atomic_inc(&iclog->ic_refcnt);
>  			xlog_state_switch_iclogs(log, iclog, 0);
>  			spin_unlock(&log->l_icloglock);
>  			if (xlog_state_release_iclog(log, iclog))
> Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-02-15 11:19:08.080544022 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-02-15 11:19:14.403726218 +1100
> @@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
>  #endif
>  	int			ic_size;
>  	int			ic_offset;
> -	int			ic_refcnt;
> +	atomic_t		ic_refcnt;
>  	int			ic_bwritecnt;
>  	ushort_t		ic_state;
>  	char			*ic_datap;	/* pointer to iclog data */
> Index: 2.6.x-xfs-new/fs/xfs/xfsidbg.c
> ===================================================================
> --- 2.6.x-xfs-new.orig/fs/xfs/xfsidbg.c	2008-02-15 11:19:08.096541953 +1100
> +++ 2.6.x-xfs-new/fs/xfs/xfsidbg.c	2008-02-15 11:19:14.407725701 +1100
> @@ -5633,7 +5633,7 @@ xfsidbg_xiclog(xlog_in_core_t *iclog)
>  #else
>  		NULL,
>  #endif
> -		iclog->ic_refcnt, iclog->ic_bwritecnt);
> +		atomic_read(&iclog->ic_refcnt), iclog->ic_bwritecnt);
>  	if (iclog->ic_state & XLOG_STATE_ALL)
>  		printflags(iclog->ic_state, ic_flags, " state:");
>  	else
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-02-15  3:34       ` Timothy Shimmin
@ 2008-02-15  4:27         ` David Chinner
  2008-02-15  5:05           ` Timothy Shimmin
  0 siblings, 1 reply; 7+ messages in thread
From: David Chinner @ 2008-02-15  4:27 UTC (permalink / raw)
  To: Timothy Shimmin; +Cc: David Chinner, xfs-dev, xfs-oss

On Fri, Feb 15, 2008 at 02:34:53PM +1100, Timothy Shimmin wrote:
> Dave,
> 
> So a bunch of incs/decs/tests converted to the atomic versions.
> And the interesting stuff appears to be in xlog_state_release_iclog().
> Okay that looks reasonable.
> If the decrement of the cnt doesn't go down to zero then we just
> return straight away - because we won't be going to sync anything.
> And if we do go to zero then we take the lock and continue.
> Why do we test for the error/EIO beforehand now too?
> Because we don't want to return 0 if we have an error to return?

Right. Effectively it retains the same behaviour as the old code.
i.e.  A call to xlog_state_release_iclog() with an elevated refcount
used to return EIO if the log had been shutdown and we need the
initial (unlocked) check to retain that behaviour.

However, this check is racy and so in the case where the last
ref goes away and we get the lock we need to check again when
we can't possibly race with a shutdown state change.

> Seems good.
> 
> In the 1st 2 cases of the patch:
> > @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
> >
> >  		spin_lock(&log->l_icloglock);
> >  		iclog = log->l_iclog;
> > -		iclog->ic_refcnt++;
> > +		atomic_inc(&iclog->ic_refcnt);
> >  		spin_unlock(&log->l_icloglock);
> >  		xlog_state_want_sync(log, iclog);
> >  		(void) xlog_state_release_iclog(log, iclog);
> > @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
> >  		 */
> >  		spin_lock(&log->l_icloglock);
> >  		iclog = log->l_iclog;
> > -		iclog->ic_refcnt++;
> > +		atomic_inc(&iclog->ic_refcnt);
> >  		spin_unlock(&log->l_icloglock);
> 
> Do we still really need to take the lock etc?

log->iclog is protected by the l_icloglock as well, so the lock
needs to be retained to prevent races when reading and taking a
reference to it. IOWs, the l_icloglock still synchronises increments
and the final decrement on an iclog; we only need the atomic counter
to enable unlocked refcount decrements when the refcount is > 1.
 
Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch] Use atomics for iclog reference counting
  2008-02-15  4:27         ` David Chinner
@ 2008-02-15  5:05           ` Timothy Shimmin
  0 siblings, 0 replies; 7+ messages in thread
From: Timothy Shimmin @ 2008-02-15  5:05 UTC (permalink / raw)
  To: David Chinner; +Cc: xfs-dev, xfs-oss

David Chinner wrote:
> On Fri, Feb 15, 2008 at 02:34:53PM +1100, Timothy Shimmin wrote:
>> In the 1st 2 cases of the patch:
>>> @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>>>
>>>  		spin_lock(&log->l_icloglock);
>>>  		iclog = log->l_iclog;
>>> -		iclog->ic_refcnt++;
>>> +		atomic_inc(&iclog->ic_refcnt);
>>>  		spin_unlock(&log->l_icloglock);
>>>  		xlog_state_want_sync(log, iclog);
>>>  		(void) xlog_state_release_iclog(log, iclog);
>>> @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
>>>  		 */
>>>  		spin_lock(&log->l_icloglock);
>>>  		iclog = log->l_iclog;
>>> -		iclog->ic_refcnt++;
>>> +		atomic_inc(&iclog->ic_refcnt);
>>>  		spin_unlock(&log->l_icloglock);
>> Do we still really need to take the lock etc?
> 
> log->iclog is protected by the l_icloglock as well, 

Ah, yep :)

> so the lock
> needs to be retained to prevent races when reading and taking a
> reference to it. IOWs, the l_icloglock still synchronises increments
> and the final decrement on an iclog; we only need the atomic counter
> to enable unlocked refcount decrements when the refcount is > 1.
>  
Yep.

--Tim

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-02-15  5:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-21  5:30 [patch] Use atomics for iclog reference counting David Chinner
2008-01-23  5:13 ` Timothy Shimmin
2008-02-14 23:47   ` David Chinner
2008-02-15  0:24     ` David Chinner
2008-02-15  3:34       ` Timothy Shimmin
2008-02-15  4:27         ` David Chinner
2008-02-15  5:05           ` Timothy Shimmin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox