public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: David Chinner <dgc@sgi.com>
To: Andi Kleen <andi@firstfloor.org>
Cc: David Chinner <dgc@sgi.com>, Lachlan McIlroy <lachlan@sgi.com>,
	xfs-dev <xfs-dev@sgi.com>, xfs-oss <xfs@oss.sgi.com>
Subject: Re: [Patch] Cacheline align xlog_t
Date: Fri, 4 Apr 2008 11:18:31 +1000	[thread overview]
Message-ID: <20080404011831.GZ103491721@sgi.com> (raw)
In-Reply-To: <20080403064608.GS29105@one.firstfloor.org>

On Thu, Apr 03, 2008 at 08:46:08AM +0200, Andi Kleen wrote:
> On Thu, Apr 03, 2008 at 08:23:47AM +1000, David Chinner wrote:
> > > For the dynamic allocation you would rather need to make sure it
> > > starts at a cache line boundary explicitely because the allocator doesn't
> > > know the alignment of the target type, otherwise your careful
> > > padding might be useless.
> > 
> > Yup. Is there an allocator function gives us cacheline aligned
> > allocation 
> 
> __get_free_pages() @) [ok not serious]
> 
> > (apart from a slab initialised with SLAB_HWCACHE_ALIGN)?
> 
> That too yes.
> 
> > There isn't one, right?
> 
> You can always align yourself with kmalloc (or any other arbitary 
> size allocator) with the standard technique:
> get L1_CACHE_BYTES-1 or possibly better cache_line_size() - 1 bytes
> more and then align the pointer manually with ALIGN. Only tricky part
> is that you have to undo the alignment before freeing.

Great. I guess that means a wrapper function is in order. I'm not
going to deal with this as part of this patch series, though. I'll
just remove the superfluous notations.

Patch below.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

---
Reorganise xlog_t for better cacheline isolation of contention

To reduce contention on the log in large CPU count, separate
out different parts of the xlog_t structure onto different cachelines.
Move each lock onto a different cacheline along with all the members
that are accessed/modified while that lock is held.

Also, move the debugging code into debug code.

Version 2:
o kill the structure alignment hint as these are dynamically
  allocated structures.

Signed-off-by: Dave Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_log.c      |    5 +---
 fs/xfs/xfs_log_priv.h |   55 +++++++++++++++++++++++++++-----------------------
 2 files changed, 32 insertions(+), 28 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_log.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log.c	2008-04-02 21:57:09.664237248 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_log.c	2008-04-02 21:57:12.591863526 +1000
@@ -1237,9 +1237,9 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
 		iclog->ic_bp = bp;
 		iclog->hic_data = bp->b_addr;
-
+#ifdef DEBUG
 		log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
-
+#endif
 		head = &iclog->ic_header;
 		memset(head, 0, sizeof(xlog_rec_header_t));
 		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
@@ -1250,7 +1250,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		head->h_fmt = cpu_to_be32(XLOG_FMT);
 		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
 
-
 		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
 		iclog->ic_state = XLOG_STATE_ACTIVE;
 		iclog->ic_log = log;
Index: 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_log_priv.h	2008-04-02 21:57:09.668236737 +1000
+++ 2.6.x-xfs-new/fs/xfs/xfs_log_priv.h	2008-04-03 07:46:41.418849158 +1000
@@ -361,7 +361,7 @@ typedef struct xlog_iclog_fields {
 
 	/* reference counts need their own cacheline */
 	atomic_t		ic_refcnt ____cacheline_aligned_in_smp;
-} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
+} xlog_iclog_fields_t;
 
 typedef union xlog_in_core2 {
 	xlog_rec_header_t	hic_header;
@@ -402,8 +402,29 @@ typedef struct xlog_in_core {
  * that round off problems won't occur when releasing partial reservations.
  */
 typedef struct log {
+	/* The following fields don't need locking */
+	struct xfs_mount	*l_mp;	        /* mount point */
+	struct xfs_buf		*l_xbuf;        /* extra buffer for log
+						 * wrapping */
+	struct xfs_buftarg	*l_targ;        /* buftarg of log */
+	uint			l_flags;
+	uint			l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
+	struct xfs_buf_cancel	**l_buf_cancel_table;
+	int			l_iclog_hsize;  /* size of iclog header */
+	int			l_iclog_heads;  /* # of iclog header sectors */
+	uint			l_sectbb_log;   /* log2 of sector size in BBs */
+	uint			l_sectbb_mask;  /* sector size (in BBs)
+						 * alignment mask */
+	int			l_iclog_size;	/* size of log in bytes */
+	int			l_iclog_size_log; /* log power size of log */
+	int			l_iclog_bufs;	/* number of iclog buffers */
+	xfs_daddr_t		l_logBBstart;   /* start block of log */
+	int			l_logsize;      /* size of log in bytes */
+	int			l_logBBsize;    /* size of log in BB chunks */
+
 	/* The following block of fields are changed while holding icloglock */
-	sema_t			l_flushsema;    /* iclog flushing semaphore */
+	sema_t			l_flushsema ____cacheline_aligned_in_smp;
+						/* iclog flushing semaphore */
 	int			l_flushcnt;	/* # of procs waiting on this
 						 * sema */
 	int			l_covered_state;/* state of "covering disk
@@ -413,27 +434,14 @@ typedef struct log {
 	xfs_lsn_t		l_tail_lsn;     /* lsn of 1st LR with unflushed
 						 * buffers */
 	xfs_lsn_t		l_last_sync_lsn;/* lsn of last LR on disk */
-	struct xfs_mount	*l_mp;	        /* mount point */
-	struct xfs_buf		*l_xbuf;        /* extra buffer for log
-						 * wrapping */
-	struct xfs_buftarg	*l_targ;        /* buftarg of log */
-	xfs_daddr_t		l_logBBstart;   /* start block of log */
-	int			l_logsize;      /* size of log in bytes */
-	int			l_logBBsize;    /* size of log in BB chunks */
 	int			l_curr_cycle;   /* Cycle number of log writes */
 	int			l_prev_cycle;   /* Cycle number before last
 						 * block increment */
 	int			l_curr_block;   /* current logical log block */
 	int			l_prev_block;   /* previous logical log block */
-	int			l_iclog_size;	/* size of log in bytes */
-	int			l_iclog_size_log; /* log power size of log */
-	int			l_iclog_bufs;	/* number of iclog buffers */
-
-	/* The following field are used for debugging; need to hold icloglock */
-	char			*l_iclog_bak[XLOG_MAX_ICLOGS];
 
 	/* The following block of fields are changed while holding grant_lock */
-	spinlock_t		l_grant_lock;
+	spinlock_t		l_grant_lock ____cacheline_aligned_in_smp;
 	xlog_ticket_t		*l_reserve_headq;
 	xlog_ticket_t		*l_write_headq;
 	int			l_grant_reserve_cycle;
@@ -441,19 +449,16 @@ typedef struct log {
 	int			l_grant_write_cycle;
 	int			l_grant_write_bytes;
 
-	/* The following fields don't need locking */
 #ifdef XFS_LOG_TRACE
 	struct ktrace		*l_trace;
 	struct ktrace		*l_grant_trace;
 #endif
-	uint			l_flags;
-	uint			l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
-	struct xfs_buf_cancel	**l_buf_cancel_table;
-	int			l_iclog_hsize;  /* size of iclog header */
-	int			l_iclog_heads;  /* # of iclog header sectors */
-	uint			l_sectbb_log;   /* log2 of sector size in BBs */
-	uint			l_sectbb_mask;  /* sector size (in BBs)
-						 * alignment mask */
+
+	/* The following field are used for debugging; need to hold icloglock */
+#ifdef DEBUG
+	char			*l_iclog_bak[XLOG_MAX_ICLOGS];
+#endif
+
 } xlog_t;
 
 #define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)

      parent reply	other threads:[~2008-04-04  1:18 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-01 23:15 [Patch] Cacheline align xlog_t David Chinner
2008-04-02  6:35 ` Lachlan McIlroy
2008-04-02  5:44   ` David Chinner
2008-04-02  8:28     ` Andi Kleen
2008-04-02 22:23       ` David Chinner
2008-04-03  6:46         ` Andi Kleen
2008-04-04  1:02           ` Timothy Shimmin
2008-04-04  1:18           ` David Chinner [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080404011831.GZ103491721@sgi.com \
    --to=dgc@sgi.com \
    --cc=andi@firstfloor.org \
    --cc=lachlan@sgi.com \
    --cc=xfs-dev@sgi.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox