From: Dennis Zhou <dennisszhou@gmail.com>
To: Peng Zhang <zhangpeng362@huawei.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
akpm@linux-foundation.org, dennisszhou@gmail.com,
shakeelb@google.com, jack@suse.cz, surenb@google.com,
kent.overstreet@linux.dev, mhocko@suse.cz, vbabka@suse.cz,
yuzhao@google.com, yu.ma@intel.com, wangkefeng.wang@huawei.com,
sunnanyong@huawei.com
Subject: Re: [RFC PATCH v2 1/2] percpu_counter: introduce atomic mode for percpu_counter
Date: Fri, 26 Apr 2024 01:11:46 -0700 [thread overview]
Message-ID: <ZithwiPpjke2qbrv@snowbird> (raw)
In-Reply-To: <20240418142008.2775308-2-zhangpeng362@huawei.com>
On Thu, Apr 18, 2024 at 10:20:07PM +0800, Peng Zhang wrote:
> From: ZhangPeng <zhangpeng362@huawei.com>
>
> Depending on whether counters is NULL, we can support two modes:
> atomic mode and perpcu mode. We implement both modes by grouping
> the s64 count and atomic64_t count_atomic in a union. At the same time,
> we create the interface for adding and reading in atomic mode and for
> switching atomic mode to percpu mode.
>
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: ZhangPeng <zhangpeng362@huawei.com>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
> include/linux/percpu_counter.h | 43 +++++++++++++++++++++++++++++++---
> lib/percpu_counter.c | 31 ++++++++++++++++++++++--
> 2 files changed, 69 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
> index 3a44dd1e33d2..160f9734c0bb 100644
> --- a/include/linux/percpu_counter.h
> +++ b/include/linux/percpu_counter.h
> @@ -21,7 +21,13 @@
>
> struct percpu_counter {
> raw_spinlock_t lock;
> - s64 count;
> + /* Depending on whether counters is NULL, we can support two modes,
> + * atomic mode using count_atomic and perpcu mode using count.
> + */
> + union {
> + s64 count;
> + atomic64_t count_atomic;
> + };
> #ifdef CONFIG_HOTPLUG_CPU
> struct list_head list; /* All percpu_counters are on a list */
> #endif
> @@ -32,14 +38,14 @@ extern int percpu_counter_batch;
>
> int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
> gfp_t gfp, u32 nr_counters,
> - struct lock_class_key *key);
> + struct lock_class_key *key, bool switch_mode);
Nit: the lock_class_key at the end.
>
> #define percpu_counter_init_many(fbc, value, gfp, nr_counters) \
> ({ \
> static struct lock_class_key __key; \
> \
> __percpu_counter_init_many(fbc, value, gfp, nr_counters,\
> - &__key); \
> + &__key, false); \
> })
>
>
> @@ -130,6 +136,20 @@ static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
> return (fbc->counters != NULL);
> }
>
> +static inline s64 percpu_counter_atomic_read(struct percpu_counter *fbc)
> +{
> + return atomic64_read(&fbc->count_atomic);
> +}
> +
> +static inline void percpu_counter_atomic_add(struct percpu_counter *fbc,
> + s64 amount)
> +{
> + atomic64_add(amount, &fbc->count_atomic);
> +}
> +
> +int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc,
> + u32 nr_counters);
> +
> #else /* !CONFIG_SMP */
>
> struct percpu_counter {
> @@ -260,6 +280,23 @@ static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
> static inline void percpu_counter_sync(struct percpu_counter *fbc)
> {
> }
> +
> +static inline s64 percpu_counter_atomic_read(struct percpu_counter *fbc)
> +{
> + return fbc->count;
> +}
> +
> +static inline void percpu_counter_atomic_add(struct percpu_counter *fbc,
> + s64 amount)
> +{
> + percpu_counter_add(fbc, amount);
> +}
> +
> +static inline int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc,
> + u32 nr_counters)
> +{
> + return 0;
> +}
> #endif /* CONFIG_SMP */
>
> static inline void percpu_counter_inc(struct percpu_counter *fbc)
> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
> index 44dd133594d4..95c4e038051a 100644
> --- a/lib/percpu_counter.c
> +++ b/lib/percpu_counter.c
> @@ -153,7 +153,7 @@ EXPORT_SYMBOL(__percpu_counter_sum);
>
> int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
> gfp_t gfp, u32 nr_counters,
> - struct lock_class_key *key)
> + struct lock_class_key *key, bool switch_mode)
> {
> unsigned long flags __maybe_unused;
> size_t counter_size;
> @@ -174,7 +174,8 @@ int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
> #ifdef CONFIG_HOTPLUG_CPU
> INIT_LIST_HEAD(&fbc[i].list);
> #endif
> - fbc[i].count = amount;
> + if (likely(!switch_mode))
> + fbc[i].count = amount;
> fbc[i].counters = (void *)counters + (i * counter_size);
>
> debug_percpu_counter_activate(&fbc[i]);
> @@ -357,6 +358,32 @@ bool __percpu_counter_limited_add(struct percpu_counter *fbc,
> return good;
> }
>
> +/*
> + * percpu_counter_switch_to_pcpu_many: Converts struct percpu_counters from
> + * atomic mode to percpu mode.
> + */
> +int percpu_counter_switch_to_pcpu_many(struct percpu_counter *fbc,
> + u32 nr_counters)
> +{
> + static struct lock_class_key __key;
This is an improper use of lockdep. Now all of the percpu_counters
initialized on this path will key off of this lock_class_key.
> + unsigned long flags;
> + bool ret = 0;
> +
> + if (percpu_counter_initialized(fbc))
> + return 0;
> +
> + preempt_disable();
> + local_irq_save(flags);
> + if (likely(!percpu_counter_initialized(fbc)))
> + ret = __percpu_counter_init_many(fbc, 0,
> + GFP_ATOMIC|__GFP_NOWARN|__GFP_ZERO,
> + nr_counters, &__key, true);
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + return ret;
> +}
I'm staring at this API and I'm not in love with it. I think it hinges
on that there's one user of mm_stats prior hence it's safe. Generically
though, I can't see why this is safe.
I need to give this a little more thought, but my gut reaction is I'd
rather this look like percpu_refcount where we can init dead minus the
percpu allocation. Maybe that's not quite right, but I'd feel better
about it than requiring external synchronization on a percpu_counter to
ensure that it's correct.
> +
> static int __init percpu_counter_startup(void)
> {
> int ret;
> --
> 2.25.1
>
Thanks,
Dennis
next prev parent reply other threads:[~2024-04-26 8:11 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-18 14:20 [RFC PATCH v2 0/2] mm: convert mm's rss stats to use atomic mode Peng Zhang
2024-04-18 14:20 ` [RFC PATCH v2 1/2] percpu_counter: introduce atomic mode for percpu_counter Peng Zhang
2024-04-18 19:40 ` Andrew Morton
2024-04-19 2:55 ` zhangpeng (AS)
2024-04-26 8:11 ` Dennis Zhou [this message]
2024-04-29 7:45 ` zhangpeng (AS)
2024-04-18 14:20 ` [RFC PATCH v2 2/2] mm: convert mm's rss stats to use atomic mode Peng Zhang
2024-04-19 2:30 ` Rongwei Wang
2024-04-19 3:32 ` zhangpeng (AS)
2024-04-20 3:13 ` Rongwei Wang
2024-04-20 8:44 ` zhangpeng (AS)
2024-05-16 11:50 ` Kairui Song
2024-05-16 15:14 ` Mateusz Guzik
2024-05-17 3:29 ` Kairui Song
2024-05-17 18:08 ` Mateusz Guzik
2024-05-19 14:13 ` Dennis Zhou
2024-04-24 4:29 ` [RFC PATCH v2 0/2] " zhangpeng (AS)
2024-04-24 4:51 ` Dennis Zhou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZithwiPpjke2qbrv@snowbird \
--to=dennisszhou@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=jack@suse.cz \
--cc=kent.overstreet@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
--cc=shakeelb@google.com \
--cc=sunnanyong@huawei.com \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=wangkefeng.wang@huawei.com \
--cc=yu.ma@intel.com \
--cc=yuzhao@google.com \
--cc=zhangpeng362@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.