linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mateusz Guzik <mjguzik@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: dennis@kernel.org, tj@kernel.org, cl@linux.com,
	akpm@linux-foundation.org, shakeelb@google.com,
	vegard.nossum@oracle.com, linux-mm@kvack.org,
	Mateusz Guzik <mjguzik@gmail.com>
Subject: [PATCH v3 1/2] pcpcntr: add group allocation/free
Date: Wed, 23 Aug 2023 07:06:08 +0200	[thread overview]
Message-ID: <20230823050609.2228718-2-mjguzik@gmail.com> (raw)
In-Reply-To: <20230823050609.2228718-1-mjguzik@gmail.com>

Allocations and frees are globally serialized on the pcpu lock (and the
CPU hotplug lock if enabled, which is the case on Debian).

At least one frequent consumer allocates 4 back-to-back counters (and
frees them in the same manner), exacerbating the problem.

While this does not fully remedy scalability issues, it is a step
towards that goal and provides immediate relief.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 include/linux/percpu_counter.h | 39 ++++++++++++++++++----
 lib/percpu_counter.c           | 61 +++++++++++++++++++++++-----------
 2 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 75b73c83bc9d..f1e7c987e3d3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -30,17 +30,27 @@ struct percpu_counter {
 
 extern int percpu_counter_batch;
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
-			  struct lock_class_key *key);
+int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
+			  u32 nr_counters, struct lock_class_key *key);
 
-#define percpu_counter_init(fbc, value, gfp)				\
+#define percpu_counter_init_many(fbc, value, gfp, nr_counters)		\
 	({								\
 		static struct lock_class_key __key;			\
 									\
-		__percpu_counter_init(fbc, value, gfp, &__key);		\
+		__percpu_counter_init_many(fbc, value, gfp, nr_counters,\
+					   &__key);			\
 	})
 
-void percpu_counter_destroy(struct percpu_counter *fbc);
+
+#define percpu_counter_init(fbc, value, gfp)				\
+	percpu_counter_init_many(fbc, value, gfp, 1)
+
+void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters);
+static inline void percpu_counter_destroy(struct percpu_counter *fbc)
+{
+	percpu_counter_destroy_many(fbc, 1);
+}
+
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
 			      s32 batch);
@@ -116,11 +126,26 @@ struct percpu_counter {
 	s64 count;
 };
 
+static inline int percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
+				           gfp_t gfp, u32 nr_counters)
+{
+	u32 i;
+
+	for (i = 0; i < nr_counters; i++)
+		fbc[i].count = amount;
+
+	return 0;
+}
+
 static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 				      gfp_t gfp)
 {
-	fbc->count = amount;
-	return 0;
+	return percpu_counter_init_many(fbc, amount, gfp, 1);
+}
+
+static inline void percpu_counter_destroy_many(struct percpu_counter *fbc,
+					       u32 nr_counters)
+{
 }
 
 static inline void percpu_counter_destroy(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 5004463c4f9f..9338b27f1cdd 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -151,48 +151,71 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
-			  struct lock_class_key *key)
+int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
+			  u32 nr_counters, struct lock_class_key *key)
 {
 	unsigned long flags __maybe_unused;
-
-	raw_spin_lock_init(&fbc->lock);
-	lockdep_set_class(&fbc->lock, key);
-	fbc->count = amount;
-	fbc->counters = alloc_percpu_gfp(s32, gfp);
-	if (!fbc->counters)
+	size_t counter_size;
+	s32 __percpu *counters;
+	u32 i;
+
+	counter_size = ALIGN(sizeof(*counters), __alignof__(*counters));
+	counters = __alloc_percpu_gfp(nr_counters * counter_size,
+				      __alignof__(*counters), gfp);
+	if (!counters) {
+		fbc[0].counters = NULL;
 		return -ENOMEM;
+	}
 
-	debug_percpu_counter_activate(fbc);
+	for (i = 0; i < nr_counters; i++) {
+		raw_spin_lock_init(&fbc[i].lock);
+		lockdep_set_class(&fbc[i].lock, key);
+#ifdef CONFIG_HOTPLUG_CPU
+		INIT_LIST_HEAD(&fbc[i].list);
+#endif
+		fbc[i].count = amount;
+		fbc[i].counters = (void *)counters + (i * counter_size);
+
+		debug_percpu_counter_activate(&fbc[i]);
+	}
 
 #ifdef CONFIG_HOTPLUG_CPU
-	INIT_LIST_HEAD(&fbc->list);
 	spin_lock_irqsave(&percpu_counters_lock, flags);
-	list_add(&fbc->list, &percpu_counters);
+	for (i = 0; i < nr_counters; i++)
+		list_add(&fbc[i].list, &percpu_counters);
 	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	return 0;
 }
-EXPORT_SYMBOL(__percpu_counter_init);
+EXPORT_SYMBOL(__percpu_counter_init_many);
 
-void percpu_counter_destroy(struct percpu_counter *fbc)
+void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters)
 {
 	unsigned long flags __maybe_unused;
+	u32 i;
+
+	if (WARN_ON_ONCE(!fbc))
+		return;
 
-	if (!fbc->counters)
+	if (!fbc[0].counters)
 		return;
 
-	debug_percpu_counter_deactivate(fbc);
+	for (i = 0; i < nr_counters; i++)
+		debug_percpu_counter_deactivate(&fbc[i]);
 
 #ifdef CONFIG_HOTPLUG_CPU
 	spin_lock_irqsave(&percpu_counters_lock, flags);
-	list_del(&fbc->list);
+	for (i = 0; i < nr_counters; i++)
+		list_del(&fbc[i].list);
 	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
-	free_percpu(fbc->counters);
-	fbc->counters = NULL;
+
+	free_percpu(fbc[0].counters);
+
+	for (i = 0; i < nr_counters; i++)
+		fbc[i].counters = NULL;
 }
-EXPORT_SYMBOL(percpu_counter_destroy);
+EXPORT_SYMBOL(percpu_counter_destroy_many);
 
 int percpu_counter_batch __read_mostly = 32;
 EXPORT_SYMBOL(percpu_counter_batch);
-- 
2.41.0



  reply	other threads:[~2023-08-23  5:06 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-23  5:06 [PATCH v3 0/2] execve scalability issues, part 1 Mateusz Guzik
2023-08-23  5:06 ` Mateusz Guzik [this message]
2023-08-24  6:26   ` [PATCH v3 1/2] pcpcntr: add group allocation/free Dennis Zhou
2023-08-24 10:01     ` Vegard Nossum
2023-08-23  5:06 ` [PATCH v3 2/2] kernel/fork: group allocation/free of per-cpu counters for mm struct Mateusz Guzik
2023-08-24  6:28   ` Dennis Zhou
2023-09-06  8:25   ` kernel test robot
2023-08-25 15:14 ` [PATCH v3 0/2] execve scalability issues, part 1 Dennis Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230823050609.2228718-2-mjguzik@gmail.com \
    --to=mjguzik@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=dennis@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=shakeelb@google.com \
    --cc=tj@kernel.org \
    --cc=vegard.nossum@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).