From: "Darrick J. Wong" <djwong@kernel.org>
To: Dave Chinner <david@fromorbit.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure
Date: Thu, 27 May 2021 11:31:42 -0700 [thread overview]
Message-ID: <20210527183142.GH2402049@locust> (raw)
In-Reply-To: <20210519121317.585244-30-david@fromorbit.com>
On Wed, May 19, 2021 at 10:13:07PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The CIL push lock is highly contended on larger machines, becoming a
> hard bottleneck that about 700,000 transaction commits/s on >16p
> machines. To address this, start moving the CIL tracking
> infrastructure to utilise per-CPU structures.
>
> We need to track the space used, the amount of log reservation space
> reserved to write the CIL, the log items in the CIL and the busy
> extents that need to be completed by the CIL commit. This requires
> a couple of per-cpu counters, an unordered per-cpu list and a
> globally ordered per-cpu list.
>
> Create a per-cpu structure to hold these and all the management
> interfaces needed, as well as the hooks to handle hotplug CPUs.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> fs/xfs/xfs_log_cil.c | 106 +++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_log_priv.h | 15 ++++++
> include/linux/cpuhotplug.h | 1 +
> 3 files changed, 122 insertions(+)
>
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index 87d4eb321fdc..ba1c6979a4c7 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -1370,6 +1370,105 @@ xfs_log_item_in_current_chkpt(
> return lip->li_seq == cil->xc_ctx->sequence;
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +static LIST_HEAD(xlog_cil_pcp_list);
> +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> +static bool xlog_cil_pcp_init;
> +
> +/*
> + * Move dead percpu state to the relevant CIL context structures.
> + *
> + * We have to lock the CIL context here to ensure that nothing is modifying
> + * the percpu state, either addition or removal. Both of these are done under
> + * the CIL context lock, so grabbing that exclusively here will ensure we can
> + * safely drain the cilpcp for the CPU that is dying.
> + */
> +static int
> +xlog_cil_pcp_dead(
> + unsigned int cpu)
> +{
> + struct xfs_cil *cil, *n;
> +
> + spin_lock(&xlog_cil_pcp_lock);
> + list_for_each_entry_safe(cil, n, &xlog_cil_pcp_list, xc_pcp_list) {
> + spin_unlock(&xlog_cil_pcp_lock);
> + down_write(&cil->xc_ctx_lock);
> + /* move stuff on dead CPU to context */
> + up_write(&cil->xc_ctx_lock);
> + spin_lock(&xlog_cil_pcp_lock);
> + }
> + spin_unlock(&xlog_cil_pcp_lock);
> + return 0;
> +}
> +
> +static int
> +xlog_cil_pcp_hpadd(
> + struct xfs_cil *cil)
> +{
> + if (!xlog_cil_pcp_init) {
> + int ret;
> + ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
Nit: blank line between variable declarations and code.
> + "xfs/cil_pcp:dead", NULL,
> + xlog_cil_pcp_dead);
> + if (ret < 0) {
> + xfs_warn(cil->xc_log->l_mp,
> + "Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
> + ret);
> + ASSERT(0);
> + return -ENOMEM;
> + }
> + xlog_cil_pcp_init = true;
> + }
> +
> + INIT_LIST_HEAD(&cil->xc_pcp_list);
> + spin_lock(&xlog_cil_pcp_lock);
> + list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
> + spin_unlock(&xlog_cil_pcp_lock);
> + return 0;
> +}
> +
> +static void
> +xlog_cil_pcp_hpremove(
> + struct xfs_cil *cil)
> +{
> + spin_lock(&xlog_cil_pcp_lock);
> + list_del(&cil->xc_pcp_list);
> + spin_unlock(&xlog_cil_pcp_lock);
> +}
> +
> +#else /* !CONFIG_HOTPLUG_CPU */
> +static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}
> +static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
> +#endif
> +
> +static void __percpu *
> +xlog_cil_pcp_alloc(
> + struct xfs_cil *cil)
> +{
> + void __percpu *pcp;
> +
> + pcp = alloc_percpu(struct xlog_cil_pcp);
> + if (!pcp)
> + return NULL;
> +
> + if (xlog_cil_pcp_hpadd(cil) < 0) {
> + free_percpu(pcp);
> + return NULL;
> + }
> + return pcp;
> +}
> +
> +static void
> +xlog_cil_pcp_free(
> + struct xfs_cil *cil,
> + void __percpu *pcp)
> +{
> + if (!pcp)
> + return;
> + xlog_cil_pcp_hpremove(cil);
> + free_percpu(pcp);
> +}
> +
> /*
> * Perform initial CIL structure initialisation.
> */
> @@ -1384,6 +1483,12 @@ xlog_cil_init(
> if (!cil)
> return -ENOMEM;
>
> + cil->xc_pcp = xlog_cil_pcp_alloc(cil);
> + if (!cil->xc_pcp) {
> + kmem_free(cil);
> + return -ENOMEM;
> + }
> +
> INIT_LIST_HEAD(&cil->xc_cil);
> INIT_LIST_HEAD(&cil->xc_committing);
> spin_lock_init(&cil->xc_cil_lock);
> @@ -1414,6 +1519,7 @@ xlog_cil_destroy(
>
> ASSERT(list_empty(&cil->xc_cil));
> ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
> + xlog_cil_pcp_free(cil, cil->xc_pcp);
> kmem_free(cil);
> }
>
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index 85a85ab569fe..aaa1e7f7fb66 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -227,6 +227,16 @@ struct xfs_cil_ctx {
> struct work_struct push_work;
> };
>
> +/*
> + * Per-cpu CIL tracking items
> + */
> +struct xlog_cil_pcp {
> + uint32_t space_used;
> + uint32_t curr_res;
I don't think these fields need to be in this patch.
Especially since you rename one of them in patch 31.
I think this looks like the skeleton of adding per-cpu structures to the
log, though I'll read the next few patches to see if the comments make
more sense once you actually starting using the pcpu data.
--D
> + struct list_head busy_extents;
> + struct list_head log_items;
> +};
> +
> /*
> * Committed Item List structure
> *
> @@ -260,6 +270,11 @@ struct xfs_cil {
> wait_queue_head_t xc_commit_wait;
> xfs_csn_t xc_current_sequence;
> wait_queue_head_t xc_push_wait; /* background push throttle */
> +
> + void __percpu *xc_pcp; /* percpu CIL structures */
> +#ifdef CONFIG_HOTPLUG_CPU
> + struct list_head xc_pcp_list;
> +#endif
> } ____cacheline_aligned_in_smp;
>
> /* xc_flags bit values */
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 4a62b3980642..3d3ccde9e9c8 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -52,6 +52,7 @@ enum cpuhp_state {
> CPUHP_FS_BUFF_DEAD,
> CPUHP_PRINTK_DEAD,
> CPUHP_MM_MEMCQ_DEAD,
> + CPUHP_XFS_CIL_DEAD,
> CPUHP_PERCPU_CNT_DEAD,
> CPUHP_RADIX_DEAD,
> CPUHP_PAGE_ALLOC_DEAD,
> --
> 2.31.1
>
next prev parent reply other threads:[~2021-05-27 18:31 UTC|newest]
Thread overview: 90+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-19 12:12 [PATCH 00/39 v4] xfs: CIL and log optimisations Dave Chinner
2021-05-19 12:12 ` [PATCH 01/39] xfs: log stripe roundoff is a property of the log Dave Chinner
2021-05-28 0:54 ` Allison Henderson
2021-05-19 12:12 ` [PATCH 02/39] xfs: separate CIL commit record IO Dave Chinner
2021-05-28 0:54 ` Allison Henderson
2021-05-19 12:12 ` [PATCH 03/39] xfs: remove xfs_blkdev_issue_flush Dave Chinner
2021-05-28 0:54 ` Allison Henderson
2021-05-19 12:12 ` [PATCH 04/39] xfs: async blkdev cache flush Dave Chinner
2021-05-20 23:53 ` Darrick J. Wong
2021-05-28 0:54 ` Allison Henderson
2021-05-19 12:12 ` [PATCH 05/39] xfs: CIL checkpoint flushes caches unconditionally Dave Chinner
2021-05-28 0:54 ` Allison Henderson
2021-05-19 12:12 ` [PATCH 06/39] xfs: remove need_start_rec parameter from xlog_write() Dave Chinner
2021-05-19 12:12 ` [PATCH 07/39] xfs: journal IO cache flush reductions Dave Chinner
2021-05-21 0:16 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 08/39] xfs: Fix CIL throttle hang when CIL space used going backwards Dave Chinner
2021-05-19 12:12 ` [PATCH 09/39] xfs: xfs_log_force_lsn isn't passed a LSN Dave Chinner
2021-05-21 0:20 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 10/39] xfs: AIL needs asynchronous CIL forcing Dave Chinner
2021-05-21 0:33 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 11/39] xfs: CIL work is serialised, not pipelined Dave Chinner
2021-05-21 0:32 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 12/39] xfs: factor out the CIL transaction header building Dave Chinner
2021-05-19 12:12 ` [PATCH 13/39] xfs: only CIL pushes require a start record Dave Chinner
2021-05-19 12:12 ` [PATCH 14/39] xfs: embed the xlog_op_header in the unmount record Dave Chinner
2021-05-21 0:35 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 15/39] xfs: embed the xlog_op_header in the commit record Dave Chinner
2021-05-19 12:12 ` [PATCH 16/39] xfs: log tickets don't need log client id Dave Chinner
2021-05-21 0:38 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 17/39] xfs: move log iovec alignment to preparation function Dave Chinner
2021-05-19 12:12 ` [PATCH 18/39] xfs: reserve space and initialise xlog_op_header in item formatting Dave Chinner
2021-05-19 12:12 ` [PATCH 19/39] xfs: log ticket region debug is largely useless Dave Chinner
2021-05-19 12:12 ` [PATCH 20/39] xfs: pass lv chain length into xlog_write() Dave Chinner
2021-05-27 17:20 ` Darrick J. Wong
2021-06-02 22:18 ` Dave Chinner
2021-06-02 22:24 ` Darrick J. Wong
2021-06-02 22:58 ` [PATCH 20/39 V2] " Dave Chinner
2021-06-02 23:01 ` Darrick J. Wong
2021-05-19 12:12 ` [PATCH 21/39] xfs: introduce xlog_write_single() Dave Chinner
2021-05-27 17:27 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 22/39] xfs:_introduce xlog_write_partial() Dave Chinner
2021-05-27 18:06 ` Darrick J. Wong
2021-06-02 22:21 ` Dave Chinner
2021-05-19 12:13 ` [PATCH 23/39] xfs: xlog_write() no longer needs contwr state Dave Chinner
2021-05-19 12:13 ` [PATCH 24/39] xfs: xlog_write() doesn't need optype anymore Dave Chinner
2021-05-27 18:07 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 25/39] xfs: CIL context doesn't need to count iovecs Dave Chinner
2021-05-27 18:08 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 26/39] xfs: use the CIL space used counter for emptiness checks Dave Chinner
2021-05-19 12:13 ` [PATCH 27/39] xfs: lift init CIL reservation out of xc_cil_lock Dave Chinner
2021-05-19 12:13 ` [PATCH 28/39] xfs: rework per-iclog header CIL reservation Dave Chinner
2021-05-27 18:17 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure Dave Chinner
2021-05-27 18:31 ` Darrick J. Wong [this message]
2021-05-19 12:13 ` [PATCH 30/39] xfs: implement percpu cil space used calculation Dave Chinner
2021-05-27 18:41 ` Darrick J. Wong
2021-06-02 23:47 ` Dave Chinner
2021-06-03 1:26 ` Darrick J. Wong
2021-06-03 2:28 ` Dave Chinner
2021-06-03 3:01 ` Darrick J. Wong
2021-06-03 3:56 ` Dave Chinner
2021-05-19 12:13 ` [PATCH 31/39] xfs: track CIL ticket reservation in percpu structure Dave Chinner
2021-05-27 18:48 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 32/39] xfs: convert CIL busy extents to per-cpu Dave Chinner
2021-05-27 18:49 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 33/39] xfs: Add order IDs to log items in CIL Dave Chinner
2021-05-27 19:00 ` Darrick J. Wong
2021-06-03 0:16 ` Dave Chinner
2021-06-03 0:49 ` Darrick J. Wong
2021-06-03 2:13 ` Dave Chinner
2021-06-03 3:02 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 34/39] xfs: convert CIL to unordered per cpu lists Dave Chinner
2021-05-27 19:03 ` Darrick J. Wong
2021-06-03 0:27 ` Dave Chinner
2021-05-19 12:13 ` [PATCH 35/39] xfs: convert log vector chain to use list heads Dave Chinner
2021-05-27 19:13 ` Darrick J. Wong
2021-06-03 0:38 ` Dave Chinner
2021-06-03 0:50 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 36/39] xfs: move CIL ordering to the logvec chain Dave Chinner
2021-05-27 19:14 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 37/39] xfs: avoid cil push lock if possible Dave Chinner
2021-05-27 19:18 ` Darrick J. Wong
2021-05-19 12:13 ` [PATCH 38/39] xfs: xlog_sync() manually adjusts grant head space Dave Chinner
2021-05-19 12:13 ` [PATCH 39/39] xfs: expanding delayed logging design with background material Dave Chinner
2021-05-27 20:38 ` Darrick J. Wong
2021-06-03 0:57 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2021-06-03 5:22 [PATCH 00/39 v5] xfs: CIL and log optimisations Dave Chinner
2021-06-03 5:22 ` [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure Dave Chinner
2021-06-03 9:18 ` kernel test robot
2021-06-03 10:08 ` kernel test robot
2021-06-03 16:49 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210527183142.GH2402049@locust \
--to=djwong@kernel.org \
--cc=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox