* [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
@ 2024-08-02 0:42 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 03/11] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls Paul E. McKenney
` (8 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:42 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
This commit improves debuggability by dumping the stacks of
rcu_scale_writer() instances that have not completed in a reasonable
timeframe. These stacks are dumped remotely, but they will be accurate
in the thus-far common case where the stalled rcu_scale_writer() instances
are blocked.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 3269dd9c639f7..c34a8e64edc30 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -39,6 +39,7 @@
#include <linux/torture.h>
#include <linux/vmalloc.h>
#include <linux/rcupdate_trace.h>
+#include <linux/sched/debug.h>
#include "rcu.h"
@@ -111,6 +112,7 @@ static struct task_struct **reader_tasks;
static struct task_struct *shutdown_task;
static u64 **writer_durations;
+static bool *writer_done;
static int *writer_n_durations;
static atomic_t n_rcu_scale_reader_started;
static atomic_t n_rcu_scale_writer_started;
@@ -524,6 +526,7 @@ rcu_scale_writer(void *arg)
started = true;
if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
done = true;
+ WRITE_ONCE(writer_done[me], true);
sched_set_normal(current, 0);
pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
scale_type, SCALE_FLAG, me, MIN_MEAS);
@@ -549,6 +552,19 @@ rcu_scale_writer(void *arg)
if (done && !alldone &&
atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
alldone = true;
+ if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) {
+ static atomic_t dumped;
+ int i;
+
+ if (!atomic_xchg(&dumped, 1)) {
+ for (i = 0; i < nrealwriters; i++) {
+ if (writer_done[i])
+ continue;
+ pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i);
+ sched_show_task(writer_tasks[i]);
+ }
+ }
+ }
if (started && !alldone && i < MAX_MEAS - 1)
i++;
rcu_scale_wait_shutdown();
@@ -1015,10 +1031,11 @@ rcu_scale_init(void)
}
while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders)
schedule_timeout_uninterruptible(1);
- writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]), GFP_KERNEL);
+ writer_tasks = kcalloc(nrealwriters, sizeof(writer_tasks[0]), GFP_KERNEL);
writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL);
writer_n_durations = kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL);
- if (!writer_tasks || !writer_durations || !writer_n_durations) {
+ writer_done = kcalloc(nrealwriters, sizeof(writer_done[0]), GFP_KERNEL);
+ if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done) {
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 03/11] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 04/11] rcu: Mark callbacks not currently participating in barrier operation Paul E. McKenney
` (7 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
This commit adds a .stats function pointer to the rcu_scale_ops structure,
and if this is non-NULL, it is invoked after stack traces are dumped in
response to a rcu_scale_writer() stall.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index c34a8e64edc30..ddfb96e5a4e1e 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -145,6 +145,7 @@ struct rcu_scale_ops {
void (*sync)(void);
void (*exp_sync)(void);
struct task_struct *(*rso_gp_kthread)(void);
+ void (*stats)(void);
const char *name;
};
@@ -226,6 +227,11 @@ static void srcu_scale_synchronize(void)
synchronize_srcu(srcu_ctlp);
}
+static void srcu_scale_stats(void)
+{
+ srcu_torture_stats_print(srcu_ctlp, scale_type, SCALE_FLAG);
+}
+
static void srcu_scale_synchronize_expedited(void)
{
synchronize_srcu_expedited(srcu_ctlp);
@@ -243,6 +249,7 @@ static struct rcu_scale_ops srcu_ops = {
.gp_barrier = srcu_rcu_barrier,
.sync = srcu_scale_synchronize,
.exp_sync = srcu_scale_synchronize_expedited,
+ .stats = srcu_scale_stats,
.name = "srcu"
};
@@ -272,6 +279,7 @@ static struct rcu_scale_ops srcud_ops = {
.gp_barrier = srcu_rcu_barrier,
.sync = srcu_scale_synchronize,
.exp_sync = srcu_scale_synchronize_expedited,
+ .stats = srcu_scale_stats,
.name = "srcud"
};
@@ -563,6 +571,8 @@ rcu_scale_writer(void *arg)
pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i);
sched_show_task(writer_tasks[i]);
}
+ if (cur_ops->stats)
+ cur_ops->stats();
}
}
if (started && !alldone && i < MAX_MEAS - 1)
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 04/11] rcu: Mark callbacks not currently participating in barrier operation
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 03/11] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 05/11] rcuscale: Print detailed grace-period and barrier diagnostics Paul E. McKenney
` (6 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
RCU keeps a count of the number of callbacks that the current
rcu_barrier() is waiting on, but there is currently no easy way to
work out which callback is stuck. One way to do this is to mark idle
RCU-barrier callbacks by making the ->next pointer point to the callback
itself, and this commit does just that.
Later commits will use this for debug output.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/tree.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 77b5b39e19a80..930846f06bee5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4383,6 +4383,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
{
unsigned long __maybe_unused s = rcu_state.barrier_sequence;
+ rhp->next = rhp; // Mark the callback as having been invoked.
if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
rcu_barrier_trace(TPS("LastCB"), -1, s);
complete(&rcu_state.barrier_completion);
@@ -5404,6 +5405,8 @@ static void __init rcu_init_one(void)
while (i > rnp->grphi)
rnp++;
per_cpu_ptr(&rcu_data, i)->mynode = rnp;
+ per_cpu_ptr(&rcu_data, i)->barrier_head.next =
+ &per_cpu_ptr(&rcu_data, i)->barrier_head;
rcu_boot_init_percpu_data(i);
}
}
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 0/11] RCU update-side scalability update test
@ 2024-08-02 0:43 Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances Paul E. McKenney
` (9 more replies)
0 siblings, 10 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt
Hello!
This series contains updates to RCU's "rcuscale" update-side performance
and scalability test, including a fix of an brown-paper-bag bug:
1. Save a few lines with whitespace-only change.
2. Dump stacks of stalled rcu_scale_writer() instances.
3. Dump grace-period statistics when rcu_scale_writer() stalls.
4. Mark callbacks not currently participating in barrier operation.
5. Print detailed grace-period and barrier diagnostics.
6. Provide clear error when async specified without primitives.
7. Make all writer tasks report upon hang.
8. Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure.
9. Use special allocator for rcu_scale_writer().
10. NULL out top-level pointers to heap memory.
11. Count outstanding callbacks per-task rather than per-CPU.
Thanx, Paul
------------------------------------------------------------------------
b/kernel/rcu/rcuscale.c | 10 --
b/kernel/rcu/tree.c | 3
kernel/rcu/rcuscale.c | 206 +++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 191 insertions(+), 28 deletions(-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH rcu 05/11] rcuscale: Print detailed grace-period and barrier diagnostics
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (2 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 04/11] rcu: Mark callbacks not currently participating in barrier operation Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives Paul E. McKenney
` (5 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
This commit uses the new rcu_tasks_torture_stats_print(),
rcu_tasks_trace_torture_stats_print(), and
rcu_tasks_rude_torture_stats_print() functions in order to provide
detailed diagnostics on grace-period, callback, and barrier state when
rcu_scale_writer() hangs.
[ paulmck: Apply kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index ddfb96e5a4e1e..933014b381ec0 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -298,6 +298,11 @@ static void tasks_scale_read_unlock(int idx)
{
}
+static void rcu_tasks_scale_stats(void)
+{
+ rcu_tasks_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_scale_init,
@@ -310,6 +315,7 @@ static struct rcu_scale_ops tasks_ops = {
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks,
.rso_gp_kthread = get_rcu_tasks_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_scale_stats,
.name = "tasks"
};
@@ -336,6 +342,11 @@ static void tasks_rude_scale_read_unlock(int idx)
{
}
+static void rcu_tasks_rude_scale_stats(void)
+{
+ rcu_tasks_rude_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_rude_ops = {
.ptype = RCU_TASKS_RUDE_FLAVOR,
.init = rcu_sync_scale_init,
@@ -346,6 +357,7 @@ static struct rcu_scale_ops tasks_rude_ops = {
.sync = synchronize_rcu_tasks_rude,
.exp_sync = synchronize_rcu_tasks_rude,
.rso_gp_kthread = get_rcu_tasks_rude_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_rude_scale_stats,
.name = "tasks-rude"
};
@@ -374,6 +386,11 @@ static void tasks_trace_scale_read_unlock(int idx)
rcu_read_unlock_trace();
}
+static void rcu_tasks_trace_scale_stats(void)
+{
+ rcu_tasks_trace_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_tracing_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_scale_init,
@@ -386,6 +403,7 @@ static struct rcu_scale_ops tasks_tracing_ops = {
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
.rso_gp_kthread = get_rcu_tasks_trace_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_trace_scale_stats,
.name = "tasks-tracing"
};
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (3 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 05/11] rcuscale: Print detailed grace-period and barrier diagnostics Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-14 12:49 ` Neeraj Upadhyay
2024-08-02 0:43 ` [PATCH rcu 07/11] rcuscale: Make all writer tasks report upon hang Paul E. McKenney
` (4 subsequent siblings)
9 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
Currently, if the rcuscale module's async module parameter is specified
for RCU implementations that do not have sync primitives such as
call_rcu(), there will be a series of splats due to calls to a NULL
pointer. This commit therefore warns of this situation, but switches
to non-async testing.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 933014b381ec0..315ced63ec105 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -525,7 +525,7 @@ rcu_scale_writer(void *arg)
schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
- if (gp_async) {
+ if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
retry:
if (!rhp)
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
@@ -597,7 +597,7 @@ rcu_scale_writer(void *arg)
i++;
rcu_scale_wait_shutdown();
} while (!torture_must_stop());
- if (gp_async) {
+ if (gp_async && cur_ops->async) {
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max + 1;
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 07/11] rcuscale: Make all writer tasks report upon hang
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (4 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 08/11] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure Paul E. McKenney
` (3 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
This commit causes all writer tasks to provide a brief report after a
hang has been reported, spaced at one-second intervals.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 315ced63ec105..a820f11b19444 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -483,6 +483,7 @@ rcu_scale_writer(void *arg)
unsigned long jdone;
long me = (long)arg;
struct rcu_head *rhp = NULL;
+ bool selfreport = false;
bool started = false, done = false, alldone = false;
u64 t;
DEFINE_TORTURE_RANDOM(tr);
@@ -593,6 +594,11 @@ rcu_scale_writer(void *arg)
cur_ops->stats();
}
}
+ if (!selfreport && time_after(jiffies, jdone + HZ * (70 + me))) {
+ pr_info("%s: Writer %ld self-report: started %d done %d/%d->%d i %d jdone %lu.\n",
+ __func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone);
+ selfreport = true;
+ }
if (started && !alldone && i < MAX_MEAS - 1)
i++;
rcu_scale_wait_shutdown();
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 08/11] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (5 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 07/11] rcuscale: Make all writer tasks report upon hang Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 09/11] rcuscale: Use special allocator for rcu_scale_writer() Paul E. McKenney
` (2 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
Under some conditions, kmalloc(GFP_KERNEL) allocations have been
observed to repeatedly fail. This situation has been observed to
cause one of the rcu_scale_writer() instances to loop indefinitely
retrying memory allocation for an asynchronous grace-period primitive.
The problem is that if memory is short, all the other instances will
allocate all available memory before the looping task is awakened from
its rcu_barrier*() call. This in turn results in hangs, so that rcuscale
fails to complete.
This commit therefore removes the tight retry loop, so that when this
condition occurs, the affected task is still passing through the full
loop with its full set of termination checks. This spreads the risk
of indefinite memory-allocation retry failures across all instances of
rcu_scale_writer() tasks, which in turn prevents the hangs.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index a820f11b19444..01d48eb753b41 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -520,6 +520,8 @@ rcu_scale_writer(void *arg)
jdone = jiffies + minruntime * HZ;
do {
+ bool gp_succeeded = false;
+
if (writer_holdoff)
udelay(writer_holdoff);
if (writer_holdoff_jiffies)
@@ -527,23 +529,24 @@ rcu_scale_writer(void *arg)
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
-retry:
if (!rhp)
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
atomic_inc(this_cpu_ptr(&n_async_inflight));
cur_ops->async(rhp, rcu_scale_async_cb);
rhp = NULL;
+ gp_succeeded = true;
} else if (!kthread_should_stop()) {
cur_ops->gp_barrier();
- goto retry;
} else {
kfree(rhp); /* Because we are stopping. */
}
} else if (gp_exp) {
cur_ops->exp_sync();
+ gp_succeeded = true;
} else {
cur_ops->sync();
+ gp_succeeded = true;
}
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
@@ -599,7 +602,7 @@ rcu_scale_writer(void *arg)
__func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone);
selfreport = true;
}
- if (started && !alldone && i < MAX_MEAS - 1)
+ if (gp_succeeded && started && !alldone && i < MAX_MEAS - 1)
i++;
rcu_scale_wait_shutdown();
} while (!torture_must_stop());
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 09/11] rcuscale: Use special allocator for rcu_scale_writer()
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (6 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 08/11] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 10/11] rcuscale: NULL out top-level pointers to heap memory Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 11/11] rcuscale: Count outstanding callbacks per-task rather than per-CPU Paul E. McKenney
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
The rcu_scale_writer() function needs only a fixed number of rcu_head
structures per kthread, which means that a trivial allocator suffices.
This commit therefore uses an llist-based allocator using a fixed array of
structures per kthread. This allows aggressive testing of RCU performance
without stressing the slab allocators.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 123 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 113 insertions(+), 10 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 01d48eb753b41..f945f8175e859 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -105,6 +105,19 @@ static char *scale_type = "rcu";
module_param(scale_type, charp, 0444);
MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)");
+// Structure definitions for custom fixed-per-task allocator.
+struct writer_mblock {
+ struct rcu_head wmb_rh;
+ struct llist_node wmb_node;
+ struct writer_freelist *wmb_wfl;
+};
+
+struct writer_freelist {
+ struct llist_head ws_lhg;
+ struct llist_head ____cacheline_internodealigned_in_smp ws_lhp;
+ struct writer_mblock *ws_mblocks;
+};
+
static int nrealreaders;
static int nrealwriters;
static struct task_struct **writer_tasks;
@@ -113,6 +126,7 @@ static struct task_struct *shutdown_task;
static u64 **writer_durations;
static bool *writer_done;
+static struct writer_freelist *writer_freelists;
static int *writer_n_durations;
static atomic_t n_rcu_scale_reader_started;
static atomic_t n_rcu_scale_writer_started;
@@ -463,13 +477,52 @@ rcu_scale_reader(void *arg)
return 0;
}
+/*
+ * Allocate a writer_mblock structure for the specified rcu_scale_writer
+ * task.
+ */
+static struct writer_mblock *rcu_scale_alloc(long me)
+{
+ struct llist_node *llnp;
+ struct writer_freelist *wflp;
+ struct writer_mblock *wmbp;
+
+ if (WARN_ON_ONCE(!writer_freelists))
+ return NULL;
+ wflp = &writer_freelists[me];
+ if (llist_empty(&wflp->ws_lhp)) {
+ // ->ws_lhp is private to its rcu_scale_writer task.
+ wmbp = container_of(llist_del_all(&wflp->ws_lhg), struct writer_mblock, wmb_node);
+ wflp->ws_lhp.first = &wmbp->wmb_node;
+ }
+ llnp = llist_del_first(&wflp->ws_lhp);
+ if (!llnp)
+ return NULL;
+ return container_of(llnp, struct writer_mblock, wmb_node);
+}
+
+/*
+ * Free a writer_mblock structure to its rcu_scale_writer task.
+ */
+static void rcu_scale_free(struct writer_mblock *wmbp)
+{
+ struct writer_freelist *wflp;
+
+ if (!wmbp)
+ return;
+ wflp = wmbp->wmb_wfl;
+ llist_add(&wmbp->wmb_node, &wflp->ws_lhg);
+}
+
/*
* Callback function for asynchronous grace periods from rcu_scale_writer().
*/
static void rcu_scale_async_cb(struct rcu_head *rhp)
{
+ struct writer_mblock *wmbp = container_of(rhp, struct writer_mblock, wmb_rh);
+
atomic_dec(this_cpu_ptr(&n_async_inflight));
- kfree(rhp);
+ rcu_scale_free(wmbp);
}
/*
@@ -482,13 +535,13 @@ rcu_scale_writer(void *arg)
int i_max;
unsigned long jdone;
long me = (long)arg;
- struct rcu_head *rhp = NULL;
bool selfreport = false;
bool started = false, done = false, alldone = false;
u64 t;
DEFINE_TORTURE_RANDOM(tr);
u64 *wdp;
u64 *wdpp = writer_durations[me];
+ struct writer_mblock *wmbp = NULL;
VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
WARN_ON(!wdpp);
@@ -529,17 +582,18 @@ rcu_scale_writer(void *arg)
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
- if (!rhp)
- rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
- if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
+ if (!wmbp)
+ wmbp = rcu_scale_alloc(me);
+ if (wmbp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
atomic_inc(this_cpu_ptr(&n_async_inflight));
- cur_ops->async(rhp, rcu_scale_async_cb);
- rhp = NULL;
+ cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb);
+ wmbp = NULL;
gp_succeeded = true;
} else if (!kthread_should_stop()) {
cur_ops->gp_barrier();
} else {
- kfree(rhp); /* Because we are stopping. */
+ rcu_scale_free(wmbp); /* Because we are stopping. */
+ wmbp = NULL;
}
} else if (gp_exp) {
cur_ops->exp_sync();
@@ -607,6 +661,7 @@ rcu_scale_writer(void *arg)
rcu_scale_wait_shutdown();
} while (!torture_must_stop());
if (gp_async && cur_ops->async) {
+ rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max + 1;
@@ -970,10 +1025,28 @@ rcu_scale_cleanup(void)
schedule_timeout_uninterruptible(1);
}
kfree(writer_durations[i]);
+ if (writer_freelists) {
+ int ctr = 0;
+ struct llist_node *llnp;
+ struct writer_freelist *wflp = &writer_freelists[i];
+
+ if (wflp->ws_mblocks) {
+ llist_for_each(llnp, wflp->ws_lhg.first)
+ ctr++;
+ llist_for_each(llnp, wflp->ws_lhp.first)
+ ctr++;
+ WARN_ONCE(ctr != gp_async_max,
+ "%s: ctr = %d gp_async_max = %d\n",
+ __func__, ctr, gp_async_max);
+ kfree(wflp->ws_mblocks);
+ }
+ }
}
kfree(writer_tasks);
kfree(writer_durations);
kfree(writer_n_durations);
+ kfree(writer_freelists);
+ writer_freelists = NULL;
}
/* Do torture-type-specific cleanup operations. */
@@ -1000,8 +1073,9 @@ rcu_scale_shutdown(void *arg)
static int __init
rcu_scale_init(void)
{
- long i;
int firsterr = 0;
+ long i;
+ long j;
static struct rcu_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
};
@@ -1072,7 +1146,18 @@ rcu_scale_init(void)
writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL);
writer_n_durations = kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL);
writer_done = kcalloc(nrealwriters, sizeof(writer_done[0]), GFP_KERNEL);
- if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done) {
+ if (gp_async) {
+ if (gp_async_max <= 0) {
+ pr_warn("%s: gp_async_max = %d must be greater than zero.\n",
+ __func__, gp_async_max);
+ WARN_ON_ONCE(IS_BUILTIN(CONFIG_RCU_TORTURE_TEST));
+ firsterr = -EINVAL;
+ goto unwind;
+ }
+ writer_freelists = kcalloc(nrealwriters, sizeof(writer_freelists[0]), GFP_KERNEL);
+ }
+ if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done ||
+ (gp_async && !writer_freelists)) {
SCALEOUT_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
@@ -1085,6 +1170,24 @@ rcu_scale_init(void)
firsterr = -ENOMEM;
goto unwind;
}
+ if (writer_freelists) {
+ struct writer_freelist *wflp = &writer_freelists[i];
+
+ init_llist_head(&wflp->ws_lhg);
+ init_llist_head(&wflp->ws_lhp);
+ wflp->ws_mblocks = kcalloc(gp_async_max, sizeof(wflp->ws_mblocks[0]),
+ GFP_KERNEL);
+ if (!wflp->ws_mblocks) {
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+ for (j = 0; j < gp_async_max; j++) {
+ struct writer_mblock *wmbp = &wflp->ws_mblocks[j];
+
+ wmbp->wmb_wfl = wflp;
+ llist_add(&wmbp->wmb_node, &wflp->ws_lhp);
+ }
+ }
firsterr = torture_create_kthread(rcu_scale_writer, (void *)i,
writer_tasks[i]);
if (torture_init_error(firsterr))
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 10/11] rcuscale: NULL out top-level pointers to heap memory
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (7 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 09/11] rcuscale: Use special allocator for rcu_scale_writer() Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 11/11] rcuscale: Count outstanding callbacks per-task rather than per-CPU Paul E. McKenney
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney
Currently, if someone modprobes and rmmods rcuscale successfully, but
the next run errors out during the modprobe, non-NULL pointers to freed
memory will remain. If the run after that also errors out during the
modprobe, there will be double-free bugs.
This commit therefore NULLs out top-level pointers to memory that has
just been freed.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index f945f8175e859..13d379c179248 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -819,6 +819,7 @@ kfree_scale_cleanup(void)
torture_stop_kthread(kfree_scale_thread,
kfree_reader_tasks[i]);
kfree(kfree_reader_tasks);
+ kfree_reader_tasks = NULL;
}
torture_cleanup_end();
@@ -987,6 +988,7 @@ rcu_scale_cleanup(void)
torture_stop_kthread(rcu_scale_reader,
reader_tasks[i]);
kfree(reader_tasks);
+ reader_tasks = NULL;
}
if (writer_tasks) {
@@ -1043,8 +1045,11 @@ rcu_scale_cleanup(void)
}
}
kfree(writer_tasks);
+ writer_tasks = NULL;
kfree(writer_durations);
+ writer_durations = NULL;
kfree(writer_n_durations);
+ writer_n_durations = NULL;
kfree(writer_freelists);
writer_freelists = NULL;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH rcu 11/11] rcuscale: Count outstanding callbacks per-task rather than per-CPU
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
` (8 preceding siblings ...)
2024-08-02 0:43 ` [PATCH rcu 10/11] rcuscale: NULL out top-level pointers to heap memory Paul E. McKenney
@ 2024-08-02 0:43 ` Paul E. McKenney
9 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-02 0:43 UTC (permalink / raw)
To: rcu; +Cc: linux-kernel, kernel-team, rostedt, Paul E. McKenney,
Vlastimil Babka
The current rcu_scale_writer() asynchronous grace-period testing uses a
per-CPU counter to track the number of outstanding callbacks. This is
subject to CPU-imbalance errors when tasks migrate from one CPU to another
between the time that the counter is incremented and the callback is
queued, and additionally in kernels configured such that callbacks can
be invoked on some CPU other than the one that queued it.
This commit therefore arranges for per-task callback counts, thus avoiding
any issues with migration of either tasks or callbacks.
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
kernel/rcu/rcuscale.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 13d379c179248..b1c50df142eba 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -114,6 +114,7 @@ struct writer_mblock {
struct writer_freelist {
struct llist_head ws_lhg;
+ atomic_t ws_inflight;
struct llist_head ____cacheline_internodealigned_in_smp ws_lhp;
struct writer_mblock *ws_mblocks;
};
@@ -136,7 +137,6 @@ static u64 t_rcu_scale_writer_started;
static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
-static DEFINE_PER_CPU(atomic_t, n_async_inflight);
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -520,8 +520,9 @@ static void rcu_scale_free(struct writer_mblock *wmbp)
static void rcu_scale_async_cb(struct rcu_head *rhp)
{
struct writer_mblock *wmbp = container_of(rhp, struct writer_mblock, wmb_rh);
+ struct writer_freelist *wflp = wmbp->wmb_wfl;
- atomic_dec(this_cpu_ptr(&n_async_inflight));
+ atomic_dec(&wflp->ws_inflight);
rcu_scale_free(wmbp);
}
@@ -541,6 +542,7 @@ rcu_scale_writer(void *arg)
DEFINE_TORTURE_RANDOM(tr);
u64 *wdp;
u64 *wdpp = writer_durations[me];
+ struct writer_freelist *wflp = &writer_freelists[me];
struct writer_mblock *wmbp = NULL;
VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
@@ -584,8 +586,8 @@ rcu_scale_writer(void *arg)
if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
if (!wmbp)
wmbp = rcu_scale_alloc(me);
- if (wmbp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
- atomic_inc(this_cpu_ptr(&n_async_inflight));
+ if (wmbp && atomic_read(&wflp->ws_inflight) < gp_async_max) {
+ atomic_inc(&wflp->ws_inflight);
cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb);
wmbp = NULL;
gp_succeeded = true;
--
2.40.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives
2024-08-02 0:43 ` [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives Paul E. McKenney
@ 2024-08-14 12:49 ` Neeraj Upadhyay
2024-08-14 15:09 ` Paul E. McKenney
0 siblings, 1 reply; 13+ messages in thread
From: Neeraj Upadhyay @ 2024-08-14 12:49 UTC (permalink / raw)
To: Paul E. McKenney
Cc: rcu, linux-kernel, kernel-team, rostedt, Paul E. McKenney
On Thu, Aug 01, 2024 at 05:43:03PM -0700, Paul E. McKenney wrote:
> Currently, if the rcuscale module's async module parameter is specified
> for RCU implementations that do not have sync primitives such as
> call_rcu(), there will be a series of splats due to calls to a NULL
> pointer. This commit therefore warns of this situation, but switches
> to non-async testing.
>
I have changed this to below here [1]. Please let me know if I got it
wrong.
Currently, if the rcuscale module's async module parameter is specified
for RCU implementations that do not have async primitives such as
RCU Tasks Rude, there will be a series of splats due to calls to a NULL
pointer. This commit therefore warns of this situation, but switches to
non-async testing.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/neeraj.upadhyay/linux-rcu.git/commit/?h=next.14.08.24b&id=22d36840adbcab8fd826a7ca827fd60b708f03de
- Neeraj
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> kernel/rcu/rcuscale.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
> index 933014b381ec0..315ced63ec105 100644
> --- a/kernel/rcu/rcuscale.c
> +++ b/kernel/rcu/rcuscale.c
> @@ -525,7 +525,7 @@ rcu_scale_writer(void *arg)
> schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
> wdp = &wdpp[i];
> *wdp = ktime_get_mono_fast_ns();
> - if (gp_async) {
> + if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
> retry:
> if (!rhp)
> rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
> @@ -597,7 +597,7 @@ rcu_scale_writer(void *arg)
> i++;
> rcu_scale_wait_shutdown();
> } while (!torture_must_stop());
> - if (gp_async) {
> + if (gp_async && cur_ops->async) {
> cur_ops->gp_barrier();
> }
> writer_n_durations[me] = i_max + 1;
> --
> 2.40.1
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives
2024-08-14 12:49 ` Neeraj Upadhyay
@ 2024-08-14 15:09 ` Paul E. McKenney
0 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-08-14 15:09 UTC (permalink / raw)
To: Neeraj Upadhyay; +Cc: rcu, linux-kernel, kernel-team, rostedt
On Wed, Aug 14, 2024 at 06:19:15PM +0530, Neeraj Upadhyay wrote:
> On Thu, Aug 01, 2024 at 05:43:03PM -0700, Paul E. McKenney wrote:
> > Currently, if the rcuscale module's async module parameter is specified
> > for RCU implementations that do not have sync primitives such as
> > call_rcu(), there will be a series of splats due to calls to a NULL
> > pointer. This commit therefore warns of this situation, but switches
> > to non-async testing.
> >
>
> I have changed this to below here [1]. Please let me know if I got it
> wrong.
>
> Currently, if the rcuscale module's async module parameter is specified
> for RCU implementations that do not have async primitives such as
> RCU Tasks Rude, there will be a series of splats due to calls to a NULL
> pointer. This commit therefore warns of this situation, but switches to
> non-async testing.
How about something like this, but perhaps wordsmithed a bit?
"Currently, if the rcuscale module's async module parameter
is specified for RCU implementations that do not have
async primitives such as RCU Tasks Rude (which now lacks
a call_rcu_tasks_rude() function), there will be a series of
splats due to calls to a NULL pointer. This commit therefore
warns of this situation, but switches to non-async testing."
Thanx, Paul
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/neeraj.upadhyay/linux-rcu.git/commit/?h=next.14.08.24b&id=22d36840adbcab8fd826a7ca827fd60b708f03de
>
> - Neeraj
>
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > ---
> > kernel/rcu/rcuscale.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
> > index 933014b381ec0..315ced63ec105 100644
> > --- a/kernel/rcu/rcuscale.c
> > +++ b/kernel/rcu/rcuscale.c
> > @@ -525,7 +525,7 @@ rcu_scale_writer(void *arg)
> > schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
> > wdp = &wdpp[i];
> > *wdp = ktime_get_mono_fast_ns();
> > - if (gp_async) {
> > + if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
> > retry:
> > if (!rhp)
> > rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
> > @@ -597,7 +597,7 @@ rcu_scale_writer(void *arg)
> > i++;
> > rcu_scale_wait_shutdown();
> > } while (!torture_must_stop());
> > - if (gp_async) {
> > + if (gp_async && cur_ops->async) {
> > cur_ops->gp_barrier();
> > }
> > writer_n_durations[me] = i_max + 1;
> > --
> > 2.40.1
> >
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2024-08-14 15:09 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-02 0:43 [PATCH rcu 0/11] RCU update-side scalability update test Paul E. McKenney
2024-08-02 0:42 ` [PATCH rcu 02/11] rcuscale: Dump stacks of stalled rcu_scale_writer() instances Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 03/11] rcuscale: Dump grace-period statistics when rcu_scale_writer() stalls Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 04/11] rcu: Mark callbacks not currently participating in barrier operation Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 05/11] rcuscale: Print detailed grace-period and barrier diagnostics Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 06/11] rcuscale: Provide clear error when async specified without primitives Paul E. McKenney
2024-08-14 12:49 ` Neeraj Upadhyay
2024-08-14 15:09 ` Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 07/11] rcuscale: Make all writer tasks report upon hang Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 08/11] rcuscale: Make rcu_scale_writer() tolerate repeated GFP_KERNEL failure Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 09/11] rcuscale: Use special allocator for rcu_scale_writer() Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 10/11] rcuscale: NULL out top-level pointers to heap memory Paul E. McKenney
2024-08-02 0:43 ` [PATCH rcu 11/11] rcuscale: Count outstanding callbacks per-task rather than per-CPU Paul E. McKenney
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox