public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit
Date: Tue, 14 Sep 2010 10:48:10 -0400	[thread overview]
Message-ID: <20100914144810.GB3400@infradead.org> (raw)
In-Reply-To: <1284461777-1496-3-git-send-email-david@fromorbit.com>

On Tue, Sep 14, 2010 at 08:56:01PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> When commiting a transaction, we do a lock CIL state lock round trip
> on every single log vector we insert into the CIL. This is resulting
> in the lock being as hot as the inode and dcache locks on 8-way
> create workloads. Rework the insertion loops to bring the number
> of lock round trips to one per transaction for log vectors, and one
> more do the busy extents.
> 
> Also change the allocation of the log vector buffer not to zero it
> as we copy over the entire allocated buffer anyway.

The change looks good, but I think the functions names and splits in
the CIL insert code are now a bit confusing.  What about mergeing
something like the patch below into yours?


Index: xfs/fs/xfs/xfs_log_cil.c
===================================================================
--- xfs.orig/fs/xfs/xfs_log_cil.c	2010-09-14 11:32:36.365935029 -0300
+++ xfs/fs/xfs/xfs_log_cil.c	2010-09-14 11:43:58.046935056 -0300
@@ -145,6 +145,47 @@ xlog_cil_init_post_recovery(
 								log->l_curr_block);
 }
 
+static void
+xlog_cil_prepare_item(
+	struct log		*log,
+	struct xfs_log_vec	*lv,
+	int			*len,
+	int			*diff_iovecs)
+{
+	struct xfs_log_vec	*old = lv->lv_item->li_lv;
+
+	if (old) {
+		/* existing lv on log item, space used is a delta */
+		ASSERT(!list_empty(&lv->lv_item->li_cil));
+		ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+
+		*len += lv->lv_buf_len - old->lv_buf_len;
+		*diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+		kmem_free(old->lv_buf);
+		kmem_free(old);
+	} else {
+		/* new lv, must pin the log item */
+		ASSERT(!lv->lv_item->li_lv);
+		ASSERT(list_empty(&lv->lv_item->li_cil));
+
+		*len += lv->lv_buf_len;
+		*diff_iovecs += lv->lv_niovecs;
+		IOP_PIN(lv->lv_item);
+	}
+
+	/* attach new log vector to log item */
+	lv->lv_item->li_lv = lv;
+
+	/*
+	 * If this is the first time the item is being committed to the
+	 * CIL, store the sequence number on the log item so we can
+	 * tell in future commits whether this is the first checkpoint
+	 * the item is being committed into.
+	 */
+	if (!lv->lv_item->li_seq)
+		lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
 /*
  * Insert the log items into the CIL and calculate the difference in space
  * consumed by the item. Add the space to the checkpoint ticket and calculate
@@ -153,27 +194,46 @@ xlog_cil_init_post_recovery(
  * the current transaction ticket so that the accounting works out correctly.
  */
 static void
-xlog_cil_insert(
+xlog_cil_insert_items(
 	struct log		*log,
-	struct xlog_ticket	*ticket,
 	struct xfs_log_vec	*log_vector,
-	int			diff_length,
-	int			diff_iovecs)
+	struct xlog_ticket	*ticket)
 {
 	struct xfs_cil		*cil = log->l_cilp;
 	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
-	int			iclog_space;
-	int			len = diff_length;
 	struct xfs_log_vec	*lv;
+	int			len = 0;
+	int			diff_iovecs = 0;
+	int			iclog_space;
 
-	spin_lock(&cil->xc_cil_lock);
+	ASSERT(log_vector);
 
-	/* move the items to the tail of the CIL */
+	/*
+	 * Do all the accounting aggregation and switching of log vectors
+	 * around in a separate loop to the insertion of items into the CIL.
+	 * Then we can do a separate loop to update the CIL within a single
+	 * lock/unlock pair. This reduces the number of round trips on the CIL
+	 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+	 * hold time for the transaction commit.
+	 *
+	 * If this is the first time the item is being placed into the CIL in
+	 * this context, pin it so it can't be written to disk until the CIL is
+	 * flushed to the iclog and the iclog written to disk.
+	 *
+	 * We can do this safely because the context can't checkpoint until we
+	 * are done so it doesn't matter exactly how we update the CIL.
+	 */
 	for (lv = log_vector; lv; lv = lv->lv_next)
-		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+		xlog_cil_prepare_item(log, lv, &len, &diff_iovecs);
 
 	/* account for space used by new iovec headers  */
 	len += diff_iovecs * sizeof(xlog_op_header_t);
+
+	spin_lock(&cil->xc_cil_lock);
+	/* move the items to the tail of the CIL */
+	for (lv = log_vector; lv; lv = lv->lv_next)
+		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+
 	ctx->nvecs += diff_iovecs;
 
 	/*
@@ -270,76 +330,6 @@ xlog_cil_format_items(
 }
 
 static void
-xlog_cil_insert_items(
-	struct log		*log,
-	struct xfs_log_vec	*log_vector,
-	struct xlog_ticket	*ticket,
-	xfs_lsn_t		*start_lsn)
-{
-	struct xfs_log_vec	*lv;
-	int			len = 0;
-	int			diff_iovecs = 0;
-
-	ASSERT(log_vector);
-
-	if (start_lsn)
-		*start_lsn = log->l_cilp->xc_ctx->sequence;
-
-	/*
-	 * Do all the accounting aggregation and switching of log vectors
-	 * around in a separate loop to the insertion of items into the CIL.
-	 * Then we can do a separate loop to update the CIL within a single
-	 * lock/unlock pair. This reduces the number of round trips on the CIL
-	 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
-	 * hold time for the transaction commit.
-	 *
-	 * If this is the first time the item is being placed into the CIL in
-	 * this context, pin it so it can't be written to disk until the CIL is
-	 * flushed to the iclog and the iclog written to disk.
-	 *
-	 * We can do this safely because the context can't checkpoint until we
-	 * are done so it doesn't matter exactly how we update the CIL.
-	 */
-	for (lv = log_vector; lv; lv = lv->lv_next) {
-		struct xfs_log_vec	*old = lv->lv_item->li_lv;
-
-		if (old) {
-			/* existing lv on log item, space used is a delta */
-			ASSERT(!list_empty(&lv->lv_item->li_cil));
-			ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
-			len += lv->lv_buf_len - old->lv_buf_len;
-			diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
-			kmem_free(old->lv_buf);
-			kmem_free(old);
-		} else {
-			/* new lv, must pin the log item */
-			ASSERT(!lv->lv_item->li_lv);
-			ASSERT(list_empty(&lv->lv_item->li_cil));
-
-			len += lv->lv_buf_len;
-			diff_iovecs += lv->lv_niovecs;
-			IOP_PIN(lv->lv_item);
-
-		}
-
-		/* attach new log vector to log item */
-		lv->lv_item->li_lv = lv;
-
-		/*
-		 * If this is the first time the item is being committed to the
-		 * CIL, store the sequence number on the log item so we can
-		 * tell in future commits whether this is the first checkpoint
-		 * the item is being committed into.
-		 */
-		if (!lv->lv_item->li_seq)
-			lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
-	}
-
-	xlog_cil_insert(log, ticket, log_vector, len, diff_iovecs);
-}
-
-static void
 xlog_cil_free_logvec(
 	struct xfs_log_vec	*log_vector)
 {
@@ -654,7 +644,11 @@ xfs_log_commit_cil(
 
 	/* lock out background commit */
 	down_read(&log->l_cilp->xc_ctx_lock);
-	xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+
+	if (commit_lsn)
+		*commit_lsn = log->l_cilp->xc_ctx->sequence;
+
+	xlog_cil_insert_items(log, log_vector, tp->t_ticket);
 
 	/* check we didn't blow the reservation */
 	if (tp->t_ticket->t_curr_res < 0)

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2010-09-14 14:47 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-14 10:55 [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Dave Chinner
2010-09-14 10:56 ` [PATCH 01/18] xfs: single thread inode cache shrinking Dave Chinner
2010-09-14 18:48   ` Alex Elder
2010-09-14 22:48     ` Dave Chinner
2010-09-14 10:56 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-14 14:48   ` Christoph Hellwig [this message]
2010-09-14 17:21   ` Alex Elder
2010-09-14 10:56 ` [PATCH 03/18] xfs: remove debug assert for per-ag reference counting Dave Chinner
2010-09-14 14:48   ` Christoph Hellwig
2010-09-14 17:22   ` Alex Elder
2010-09-14 10:56 ` [PATCH 04/18] xfs: lockless per-ag lookups Dave Chinner
2010-09-14 12:35   ` Dave Chinner
2010-09-14 14:50   ` Christoph Hellwig
2010-09-14 17:28   ` Alex Elder
2010-09-14 10:56 ` [PATCH 05/18] xfs: convert inode cache lookups to use RCU locking Dave Chinner
2010-09-14 16:27   ` Christoph Hellwig
2010-09-14 23:17     ` Dave Chinner
2010-09-14 21:23   ` Alex Elder
2010-09-14 23:42     ` Dave Chinner
2010-09-14 10:56 ` [PATCH 06/18] xfs: convert pag_ici_lock to a spin lock Dave Chinner
2010-09-14 21:26   ` Alex Elder
2010-09-14 10:56 ` [PATCH 07/18] xfs: don't use vfs writeback for pure metadata modifications Dave Chinner
2010-09-14 14:54   ` Christoph Hellwig
2010-09-15  0:14     ` Dave Chinner
2010-09-15  0:17       ` Christoph Hellwig
2010-09-14 22:12   ` Alex Elder
2010-09-15  0:28     ` Dave Chinner
2010-11-08 10:47   ` Christoph Hellwig
2010-09-14 10:56 ` [PATCH 08/18] xfs: rename xfs_buf_get_nodaddr to be more appropriate Dave Chinner
2010-09-14 14:56   ` Christoph Hellwig
2010-09-14 22:14   ` Alex Elder
2010-09-14 10:56 ` [PATCH 09/18] xfs: introduced uncached buffer read primitve Dave Chinner
2010-09-14 14:56   ` Christoph Hellwig
2010-09-14 22:16   ` Alex Elder
2010-09-14 10:56 ` [PATCH 10/18] xfs: store xfs_mount in the buftarg instead of in the xfs_buf Dave Chinner
2010-09-14 14:57   ` Christoph Hellwig
2010-09-14 22:21   ` Alex Elder
2010-09-14 10:56 ` [PATCH 11/18] xfs: kill XBF_FS_MANAGED buffers Dave Chinner
2010-09-14 14:59   ` Christoph Hellwig
2010-09-14 22:26   ` Alex Elder
2010-09-14 10:56 ` [PATCH 12/18] xfs: use unhashed buffers for size checks Dave Chinner
2010-09-14 15:00   ` Christoph Hellwig
2010-09-14 22:29   ` Alex Elder
2010-09-14 10:56 ` [PATCH 13/18] xfs: remove buftarg hash for external devices Dave Chinner
2010-09-14 22:29   ` Alex Elder
2010-09-14 10:56 ` [PATCH 14/18] xfs: convert buffer cache hash to rbtree Dave Chinner
2010-09-14 16:29   ` Christoph Hellwig
2010-09-15 17:46   ` Alex Elder
2010-09-14 10:56 ` [PATCH 15/18] xfs; pack xfs_buf structure more tightly Dave Chinner
2010-09-14 16:30   ` Christoph Hellwig
2010-09-15 18:01   ` Alex Elder
2010-09-14 10:56 ` [PATCH 16/18] xfs: convert xfsbud shrinker to a per-buftarg shrinker Dave Chinner
2010-09-14 16:32   ` Christoph Hellwig
2010-09-15 20:19   ` Alex Elder
2010-09-16  0:28     ` Dave Chinner
2010-09-14 10:56 ` [PATCH 17/18] xfs: add a lru to the XFS buffer cache Dave Chinner
2010-09-14 23:16   ` Christoph Hellwig
2010-09-15  0:05     ` Dave Chinner
2010-09-15 21:28   ` Alex Elder
2010-09-14 10:56 ` [PATCH 18/18] xfs: stop using the page cache to back the " Dave Chinner
2010-09-14 23:20   ` Christoph Hellwig
2010-09-15  0:06     ` Dave Chinner
2010-09-14 14:25 ` [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Christoph Hellwig
2010-09-17 13:21 ` Alex Elder
2010-09-21  2:02   ` Dave Chinner
2010-09-21 16:23     ` Alex Elder
2010-09-21 22:34       ` Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2010-09-24 12:30 [PATCH 0/18] xfs: metadata scalability V3 Dave Chinner
2010-09-24 12:31 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-27  1:47 [PATCH 0/18] xfs: metadata scalability V4 Dave Chinner
2010-09-27  1:47 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100914144810.GB3400@infradead.org \
    --to=hch@infradead.org \
    --cc=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox