From: "Paul E. McKenney" <paulmck@kernel.org>
To: rcu@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, kernel-team@meta.com,
rostedt@goodmis.org, "Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH rcu 09/11] rcuscale: Use special allocator for rcu_scale_writer()
Date: Thu, 1 Aug 2024 17:43:06 -0700 [thread overview]
Message-ID: <20240802004308.4134731-9-paulmck@kernel.org> (raw)
In-Reply-To: <917e8cc8-8688-428a-9122-25544c5cc101@paulmck-laptop>
The rcu_scale_writer() function needs only a fixed number of rcu_head
structures per kthread, which means that a trivial allocator suffices.
This commit therefore uses an llist-based allocator using a fixed array of
structures per kthread. This allows aggressive testing of RCU performance
without stressing the slab allocators.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 123 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 113 insertions(+), 10 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 01d48eb753b41..f945f8175e859 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -105,6 +105,19 @@ static char *scale_type = "rcu";
module_param(scale_type, charp, 0444);
MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)");
+// Structure definitions for custom fixed-per-task allocator.
+struct writer_mblock {
+ struct rcu_head wmb_rh;
+ struct llist_node wmb_node;
+ struct writer_freelist *wmb_wfl;
+};
+
+struct writer_freelist {
+ struct llist_head ws_lhg;
+ struct llist_head ____cacheline_internodealigned_in_smp ws_lhp;
+ struct writer_mblock *ws_mblocks;
+};
+
static int nrealreaders;
static int nrealwriters;
static struct task_struct **writer_tasks;
@@ -113,6 +126,7 @@ static struct task_struct *shutdown_task;
static u64 **writer_durations;
static bool *writer_done;
+static struct writer_freelist *writer_freelists;
static int *writer_n_durations;
static atomic_t n_rcu_scale_reader_started;
static atomic_t n_rcu_scale_writer_started;
@@ -463,13 +477,52 @@ rcu_scale_reader(void *arg)
return 0;
}
+/*
+ * Allocate a writer_mblock structure for the specified rcu_scale_writer
+ * task.
+ */
+static struct writer_mblock *rcu_scale_alloc(long me)
+{
+ struct llist_node *llnp;
+ struct writer_freelist *wflp;
+ struct writer_mblock *wmbp;
+
+ if (WARN_ON_ONCE(!writer_freelists))
+ return NULL;
+ wflp = &writer_freelists[me];
+ if (llist_empty(&wflp->ws_lhp)) {
+ // ->ws_lhp is private to its rcu_scale_writer task.
+ wmbp = container_of(llist_del_all(&wflp->ws_lhg), struct writer_mblock, wmb_node);
+ wflp->ws_lhp.first = &wmbp->wmb_node;
+ }
+ llnp = llist_del_first(&wflp->ws_lhp);
+ if (!llnp)
+ return NULL;
+ return container_of(llnp, struct writer_mblock, wmb_node);
+}
+
+/*
+ * Free a writer_mblock structure to its rcu_scale_writer task.
+ */
+static void rcu_scale_free(struct writer_mblock *wmbp)
+{
+ struct writer_freelist *wflp;
+
+ if (!wmbp)
+ return;
+ wflp = wmbp->wmb_wfl;
+ llist_add(&wmbp->wmb_node, &wflp->ws_lhg);
+}
+
/*
* Callback function for asynchronous grace periods from rcu_scale_writer().
*/
static void rcu_scale_async_cb(struct rcu_head *rhp)
{
+ struct writer_mblock *wmbp = container_of(rhp, struct writer_mblock, wmb_rh);
+
atomic_dec(this_cpu_ptr(&n_async_inflight));
- kfree(rhp);
+ rcu_scale_free(wmbp);
}
/*
@@ -482,13 +535,13 @@ rcu_scale_writer(void *arg)
int i_max;
unsigned long jdone;
long me = (long)arg;
- struct rcu_head *rhp = NULL;
bool selfreport = false;
bool started = false, done = false, alldone = false;
u64 t;
DEFINE_TORTURE_RANDOM(tr);
u64 *wdp;
u64 *wdpp = writer_durations[me];
+ struct writer_mblock *wmbp = NULL;
VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
WARN_ON(!wdpp);
@@ -529,17 +582,18 @@ rcu_scale_writer(void *arg)
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
- if (!rhp)
- rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
- if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
+ if (!wmbp)
+ wmbp = rcu_scale_alloc(me);
+ if (wmbp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
atomic_inc(this_cpu_ptr(&n_async_inflight));
- cur_ops->async(rhp, rcu_scale_async_cb);
- rhp = NULL;
+ cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb);
+ wmbp = NULL;
gp_succeeded = true;
} else if (!kthread_should_stop()) {
cur_ops->gp_barrier();
} else {
- kfree(rhp); /* Because we are stopping. */
+ rcu_scale_free(wmbp); /* Because we are stopping. */
+ wmbp = NULL;
}
} else if (gp_exp) {
cur_ops->exp_sync();
@@ -607,6 +661,7 @@ rcu_scale_writer(void *arg)
rcu_scale_wait_shutdown();
} while (!torture_must_stop());
if (gp_async && cur_ops->async) {
+ rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max + 1;
@@ -970,10 +1025,28 @@ rcu_scale_cleanup(void)
schedule_timeout_uninterruptible(1);
}
kfree(writer_durations[i]);
+ if (writer_freelists) {
+ int ctr = 0;
+ struct llist_node *llnp;
+ struct writer_freelist *wflp = &writer_freelists[i];
+
+ if (wflp->ws_mblocks) {
+ llist_for_each(llnp, wflp->ws_lhg.first)
+ ctr++;
+ llist_for_each(llnp, wflp->ws_lhp.first)
+ ctr++;
+ WARN_ONCE(ctr != gp_async_max,
+ "%s: ctr = %d gp_async_max = %d\n",
+ __func__, ctr, gp_async_max);
+ kfree(wflp->ws_mblocks);
+ }
+ }
}
kfree(writer_tasks);
kfree(writer_durations);
kfree(writer_n_durations);
+ kfree(writer_freelists);
+ writer_freelists = NULL;
}
/* Do torture-type-specific cleanup operations. */
@@ -1000,8 +1073,9 @@ rcu_scale_shutdown(void *arg)
static int __init
rcu_scale_init(void)
{
- long i;
int firsterr = 0;
+ long i;
+ long j;
static struct rcu_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
};
@@ -1072,7 +1146,18 @@ rcu_scale_init(void)
writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL);
writer_n_durations = kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL);
writer_done = kcalloc(nrealwriters, sizeof(writer_done[0]), GFP_KERNEL);
- if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done) {
+ if (gp_async) {
+ if (gp_async_max <= 0) {
+ pr_warn("%s: gp_async_max = %d must be greater than zero.\n",
+ __func__, gp_async_max);
+ WARN_ON_ONCE(IS_BUILTIN(CONFIG_RCU_TORTURE_TEST));
+ firsterr = -EINVAL;
+ goto unwind;
+ }
+ writer_freelists = kcalloc(nrealwriters, sizeof(writer_freelists[0]), GFP_KERNEL);
+ }
+ if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done ||
+ (gp_async && !writer_freelists)) {
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
@@ -1085,6 +1170,24 @@ rcu_scale_init(void)
firsterr = -ENOMEM;
goto unwind;
}
+ if (writer_freelists) {
+ struct writer_freelist *wflp = &writer_freelists[i];
+
+ init_llist_head(&wflp->ws_lhg);
+ init_llist_head(&wflp->ws_lhp);
+ wflp->ws_mblocks = kcalloc(gp_async_max, sizeof(wflp->ws_mblocks[0]),
+ GFP_KERNEL);
+ if (!wflp->ws_mblocks) {
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+ for (j = 0; j < gp_async_max; j++) {
+ struct writer_mblock *wmbp = &wflp->ws_mblocks[j];
+
+ wmbp->wmb_wfl = wflp;
+ llist_add(&wmbp->wmb_node, &wflp->ws_lhp);
+ }
+ }
firsterr = torture_create_kthread(rcu_scale_writer, (void *)i,
writer_tasks[i]);
if (torture_init_error(firsterr))
--
2.40.1
next prev parent reply other threads:[~2024-08-02 0:43 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 01/11] rcuscale: Save a few lines with whitespace-only change Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 03/11] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 04/11] rcu: Mark callbacks not currently participating in barrier operation Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 05/11] rcuscale: Print detailed grace-period and barrier diagnostics Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives Paul E. McKenney
2024-08-14 12:49 ` Neeraj Upadhyay
2024-08-14 15:09 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 07/11] rcuscale: Make all writer tasks report upon hang Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 08/11] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure Paul E. McKenney
2024-08-02 0:43 ` Paul E. McKenney [this message]
2024-08-02 0:43 ` [PATCH rcu 10/11] rcuscale: NULL out top-level pointers to heap memory Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 11/11] rcuscale: Count outstanding callbacks per-task rather than per-CPU Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240802004308.4134731-9-paulmck@kernel.org \
--to=paulmck@kernel.org \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rcu@vger.kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.