From: Christoph Hellwig <hch@infradead.org>
To: Dave Chinner <david@fromorbit.com>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit
Date: Tue, 14 Sep 2010 10:48:10 -0400 [thread overview]
Message-ID: <20100914144810.GB3400@infradead.org> (raw)
In-Reply-To: <1284461777-1496-3-git-send-email-david@fromorbit.com>
On Tue, Sep 14, 2010 at 08:56:01PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> When commiting a transaction, we do a lock CIL state lock round trip
> on every single log vector we insert into the CIL. This is resulting
> in the lock being as hot as the inode and dcache locks on 8-way
> create workloads. Rework the insertion loops to bring the number
> of lock round trips to one per transaction for log vectors, and one
> more do the busy extents.
>
> Also change the allocation of the log vector buffer not to zero it
> as we copy over the entire allocated buffer anyway.
The change looks good, but I think the functions names and splits in
the CIL insert code are now a bit confusing. What about mergeing
something like the patch below into yours?
Index: xfs/fs/xfs/xfs_log_cil.c
===================================================================
--- xfs.orig/fs/xfs/xfs_log_cil.c 2010-09-14 11:32:36.365935029 -0300
+++ xfs/fs/xfs/xfs_log_cil.c 2010-09-14 11:43:58.046935056 -0300
@@ -145,6 +145,47 @@ xlog_cil_init_post_recovery(
log->l_curr_block);
}
+static void
+xlog_cil_prepare_item(
+ struct log *log,
+ struct xfs_log_vec *lv,
+ int *len,
+ int *diff_iovecs)
+{
+ struct xfs_log_vec *old = lv->lv_item->li_lv;
+
+ if (old) {
+ /* existing lv on log item, space used is a delta */
+ ASSERT(!list_empty(&lv->lv_item->li_cil));
+ ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+
+ *len += lv->lv_buf_len - old->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+ kmem_free(old->lv_buf);
+ kmem_free(old);
+ } else {
+ /* new lv, must pin the log item */
+ ASSERT(!lv->lv_item->li_lv);
+ ASSERT(list_empty(&lv->lv_item->li_cil));
+
+ *len += lv->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs;
+ IOP_PIN(lv->lv_item);
+ }
+
+ /* attach new log vector to log item */
+ lv->lv_item->li_lv = lv;
+
+ /*
+ * If this is the first time the item is being committed to the
+ * CIL, store the sequence number on the log item so we can
+ * tell in future commits whether this is the first checkpoint
+ * the item is being committed into.
+ */
+ if (!lv->lv_item->li_seq)
+ lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
/*
* Insert the log items into the CIL and calculate the difference in space
* consumed by the item. Add the space to the checkpoint ticket and calculate
@@ -153,27 +194,46 @@ xlog_cil_init_post_recovery(
* the current transaction ticket so that the accounting works out correctly.
*/
static void
-xlog_cil_insert(
+xlog_cil_insert_items(
struct log *log,
- struct xlog_ticket *ticket,
struct xfs_log_vec *log_vector,
- int diff_length,
- int diff_iovecs)
+ struct xlog_ticket *ticket)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx = cil->xc_ctx;
- int iclog_space;
- int len = diff_length;
struct xfs_log_vec *lv;
+ int len = 0;
+ int diff_iovecs = 0;
+ int iclog_space;
- spin_lock(&cil->xc_cil_lock);
+ ASSERT(log_vector);
- /* move the items to the tail of the CIL */
+ /*
+ * Do all the accounting aggregation and switching of log vectors
+ * around in a separate loop to the insertion of items into the CIL.
+ * Then we can do a separate loop to update the CIL within a single
+ * lock/unlock pair. This reduces the number of round trips on the CIL
+ * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+ * hold time for the transaction commit.
+ *
+ * If this is the first time the item is being placed into the CIL in
+ * this context, pin it so it can't be written to disk until the CIL is
+ * flushed to the iclog and the iclog written to disk.
+ *
+ * We can do this safely because the context can't checkpoint until we
+ * are done so it doesn't matter exactly how we update the CIL.
+ */
for (lv = log_vector; lv; lv = lv->lv_next)
- list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+ xlog_cil_prepare_item(log, lv, &len, &diff_iovecs);
/* account for space used by new iovec headers */
len += diff_iovecs * sizeof(xlog_op_header_t);
+
+ spin_lock(&cil->xc_cil_lock);
+ /* move the items to the tail of the CIL */
+ for (lv = log_vector; lv; lv = lv->lv_next)
+ list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+
ctx->nvecs += diff_iovecs;
/*
@@ -270,76 +330,6 @@ xlog_cil_format_items(
}
static void
-xlog_cil_insert_items(
- struct log *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket,
- xfs_lsn_t *start_lsn)
-{
- struct xfs_log_vec *lv;
- int len = 0;
- int diff_iovecs = 0;
-
- ASSERT(log_vector);
-
- if (start_lsn)
- *start_lsn = log->l_cilp->xc_ctx->sequence;
-
- /*
- * Do all the accounting aggregation and switching of log vectors
- * around in a separate loop to the insertion of items into the CIL.
- * Then we can do a separate loop to update the CIL within a single
- * lock/unlock pair. This reduces the number of round trips on the CIL
- * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
- * hold time for the transaction commit.
- *
- * If this is the first time the item is being placed into the CIL in
- * this context, pin it so it can't be written to disk until the CIL is
- * flushed to the iclog and the iclog written to disk.
- *
- * We can do this safely because the context can't checkpoint until we
- * are done so it doesn't matter exactly how we update the CIL.
- */
- for (lv = log_vector; lv; lv = lv->lv_next) {
- struct xfs_log_vec *old = lv->lv_item->li_lv;
-
- if (old) {
- /* existing lv on log item, space used is a delta */
- ASSERT(!list_empty(&lv->lv_item->li_cil));
- ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
- len += lv->lv_buf_len - old->lv_buf_len;
- diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
- kmem_free(old->lv_buf);
- kmem_free(old);
- } else {
- /* new lv, must pin the log item */
- ASSERT(!lv->lv_item->li_lv);
- ASSERT(list_empty(&lv->lv_item->li_cil));
-
- len += lv->lv_buf_len;
- diff_iovecs += lv->lv_niovecs;
- IOP_PIN(lv->lv_item);
-
- }
-
- /* attach new log vector to log item */
- lv->lv_item->li_lv = lv;
-
- /*
- * If this is the first time the item is being committed to the
- * CIL, store the sequence number on the log item so we can
- * tell in future commits whether this is the first checkpoint
- * the item is being committed into.
- */
- if (!lv->lv_item->li_seq)
- lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
- }
-
- xlog_cil_insert(log, ticket, log_vector, len, diff_iovecs);
-}
-
-static void
xlog_cil_free_logvec(
struct xfs_log_vec *log_vector)
{
@@ -654,7 +644,11 @@ xfs_log_commit_cil(
/* lock out background commit */
down_read(&log->l_cilp->xc_ctx_lock);
- xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+
+ if (commit_lsn)
+ *commit_lsn = log->l_cilp->xc_ctx->sequence;
+
+ xlog_cil_insert_items(log, log_vector, tp->t_ticket);
/* check we didn't blow the reservation */
if (tp->t_ticket->t_curr_res < 0)
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-09-14 14:47 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-14 10:55 [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Dave Chinner
2010-09-14 10:56 ` [PATCH 01/18] xfs: single thread inode cache shrinking Dave Chinner
2010-09-14 18:48 ` Alex Elder
2010-09-14 22:48 ` Dave Chinner
2010-09-14 10:56 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-14 14:48 ` Christoph Hellwig [this message]
2010-09-14 17:21 ` Alex Elder
2010-09-14 10:56 ` [PATCH 03/18] xfs: remove debug assert for per-ag reference counting Dave Chinner
2010-09-14 14:48 ` Christoph Hellwig
2010-09-14 17:22 ` Alex Elder
2010-09-14 10:56 ` [PATCH 04/18] xfs: lockless per-ag lookups Dave Chinner
2010-09-14 12:35 ` Dave Chinner
2010-09-14 14:50 ` Christoph Hellwig
2010-09-14 17:28 ` Alex Elder
2010-09-14 10:56 ` [PATCH 05/18] xfs: convert inode cache lookups to use RCU locking Dave Chinner
2010-09-14 16:27 ` Christoph Hellwig
2010-09-14 23:17 ` Dave Chinner
2010-09-14 21:23 ` Alex Elder
2010-09-14 23:42 ` Dave Chinner
2010-09-14 10:56 ` [PATCH 06/18] xfs: convert pag_ici_lock to a spin lock Dave Chinner
2010-09-14 21:26 ` Alex Elder
2010-09-14 10:56 ` [PATCH 07/18] xfs: don't use vfs writeback for pure metadata modifications Dave Chinner
2010-09-14 14:54 ` Christoph Hellwig
2010-09-15 0:14 ` Dave Chinner
2010-09-15 0:17 ` Christoph Hellwig
2010-09-14 22:12 ` Alex Elder
2010-09-15 0:28 ` Dave Chinner
2010-11-08 10:47 ` Christoph Hellwig
2010-09-14 10:56 ` [PATCH 08/18] xfs: rename xfs_buf_get_nodaddr to be more appropriate Dave Chinner
2010-09-14 14:56 ` Christoph Hellwig
2010-09-14 22:14 ` Alex Elder
2010-09-14 10:56 ` [PATCH 09/18] xfs: introduced uncached buffer read primitve Dave Chinner
2010-09-14 14:56 ` Christoph Hellwig
2010-09-14 22:16 ` Alex Elder
2010-09-14 10:56 ` [PATCH 10/18] xfs: store xfs_mount in the buftarg instead of in the xfs_buf Dave Chinner
2010-09-14 14:57 ` Christoph Hellwig
2010-09-14 22:21 ` Alex Elder
2010-09-14 10:56 ` [PATCH 11/18] xfs: kill XBF_FS_MANAGED buffers Dave Chinner
2010-09-14 14:59 ` Christoph Hellwig
2010-09-14 22:26 ` Alex Elder
2010-09-14 10:56 ` [PATCH 12/18] xfs: use unhashed buffers for size checks Dave Chinner
2010-09-14 15:00 ` Christoph Hellwig
2010-09-14 22:29 ` Alex Elder
2010-09-14 10:56 ` [PATCH 13/18] xfs: remove buftarg hash for external devices Dave Chinner
2010-09-14 22:29 ` Alex Elder
2010-09-14 10:56 ` [PATCH 14/18] xfs: convert buffer cache hash to rbtree Dave Chinner
2010-09-14 16:29 ` Christoph Hellwig
2010-09-15 17:46 ` Alex Elder
2010-09-14 10:56 ` [PATCH 15/18] xfs; pack xfs_buf structure more tightly Dave Chinner
2010-09-14 16:30 ` Christoph Hellwig
2010-09-15 18:01 ` Alex Elder
2010-09-14 10:56 ` [PATCH 16/18] xfs: convert xfsbud shrinker to a per-buftarg shrinker Dave Chinner
2010-09-14 16:32 ` Christoph Hellwig
2010-09-15 20:19 ` Alex Elder
2010-09-16 0:28 ` Dave Chinner
2010-09-14 10:56 ` [PATCH 17/18] xfs: add a lru to the XFS buffer cache Dave Chinner
2010-09-14 23:16 ` Christoph Hellwig
2010-09-15 0:05 ` Dave Chinner
2010-09-15 21:28 ` Alex Elder
2010-09-14 10:56 ` [PATCH 18/18] xfs: stop using the page cache to back the " Dave Chinner
2010-09-14 23:20 ` Christoph Hellwig
2010-09-15 0:06 ` Dave Chinner
2010-09-14 14:25 ` [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Christoph Hellwig
2010-09-17 13:21 ` Alex Elder
2010-09-21 2:02 ` Dave Chinner
2010-09-21 16:23 ` Alex Elder
2010-09-21 22:34 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2010-09-24 12:30 [PATCH 0/18] xfs: metadata scalability V3 Dave Chinner
2010-09-24 12:31 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-27 1:47 [PATCH 0/18] xfs: metadata scalability V4 Dave Chinner
2010-09-27 1:47 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100914144810.GB3400@infradead.org \
--to=hch@infradead.org \
--cc=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox