From: Tejun Heo <tj@kernel.org>
To: David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>
Cc: Christian Loehle <christian.loehle@arm.com>,
Emil Tsalapatis <emil@etsalapatis.com>,
sched-ext@lists.linux.dev, linux-kernel@vger.kernel.org,
Tejun Heo <tj@kernel.org>
Subject: [PATCH 2/2] selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test
Date: Sat, 28 Mar 2026 14:18:56 -1000 [thread overview]
Message-ID: <20260329001856.835643-3-tj@kernel.org> (raw)
In-Reply-To: <20260329001856.835643-1-tj@kernel.org>
Add a test that creates a 3-CPU kick_wait cycle (A->B->C->A). A BPF
scheduler kicks the next CPU in the ring with SCX_KICK_WAIT on every
enqueue while userspace workers generate continuous scheduling churn via
sched_yield(). Without the preceding fix, this hangs the machine within seconds.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
tools/testing/selftests/sched_ext/Makefile | 1 +
.../sched_ext/cyclic_kick_wait.bpf.c | 68 ++++++
.../selftests/sched_ext/cyclic_kick_wait.c | 194 ++++++++++++++++++
3 files changed, 263 insertions(+)
create mode 100644 tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
create mode 100644 tools/testing/selftests/sched_ext/cyclic_kick_wait.c
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 006300ac6dff..1c9ca328cca1 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -188,6 +188,7 @@ auto-test-targets := \
rt_stall \
test_example \
total_bw \
+ cyclic_kick_wait \
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
new file mode 100644
index 000000000000..cb34d3335917
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.bpf.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock.
+ *
+ * Three CPUs are designated from userspace. Every enqueue from one of the
+ * three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a
+ * persistent A -> B -> C -> A wait cycle pressure.
+ */
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+const volatile s32 test_cpu_a;
+const volatile s32 test_cpu_b;
+const volatile s32 test_cpu_c;
+
+u64 nr_enqueues;
+u64 nr_wait_kicks;
+
+UEI_DEFINE(uei);
+
+static s32 target_cpu(s32 cpu)
+{
+ if (cpu == test_cpu_a)
+ return test_cpu_b;
+ if (cpu == test_cpu_b)
+ return test_cpu_c;
+ if (cpu == test_cpu_c)
+ return test_cpu_a;
+ return -1;
+}
+
+void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p,
+ u64 enq_flags)
+{
+ s32 this_cpu = bpf_get_smp_processor_id();
+ s32 tgt;
+
+ __sync_fetch_and_add(&nr_enqueues, 1);
+
+ if (p->flags & PF_KTHREAD) {
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF,
+ enq_flags | SCX_ENQ_PREEMPT);
+ return;
+ }
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+
+ tgt = target_cpu(this_cpu);
+ if (tgt < 0 || tgt == this_cpu)
+ return;
+
+ __sync_fetch_and_add(&nr_wait_kicks, 1);
+ scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT);
+}
+
+void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops cyclic_kick_wait_ops = {
+ .enqueue = cyclic_kick_wait_enqueue,
+ .exit = cyclic_kick_wait_exit,
+ .name = "cyclic_kick_wait",
+ .timeout_ms = 1000U,
+};
diff --git a/tools/testing/selftests/sched_ext/cyclic_kick_wait.c b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c
new file mode 100644
index 000000000000..c2e5aa9de715
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/cyclic_kick_wait.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Test SCX_KICK_WAIT forward progress under cyclic wait pressure.
+ *
+ * SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path.
+ * If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits
+ * for A), all CPUs deadlock unless the implementation breaks the cycle.
+ *
+ * This test creates that scenario: three CPUs are arranged in a ring. The BPF
+ * scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT
+ * on every enqueue. Userspace pins 4 worker threads per CPU that loop calling
+ * sched_yield(), generating a steady stream of enqueues and thus sustained
+ * A->B->C->A kick_wait cycle pressure. The test passes if the system remains
+ * responsive for 5 seconds without the scheduler being killed by the watchdog.
+ */
+#define _GNU_SOURCE
+
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <scx/common.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "scx_test.h"
+#include "cyclic_kick_wait.bpf.skel.h"
+
+#define WORKERS_PER_CPU 4
+#define NR_TEST_CPUS 3
+#define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU)
+
+struct worker_ctx {
+ pthread_t tid;
+ int cpu;
+ volatile bool stop;
+ volatile __u64 iters;
+ bool started;
+};
+
+static void *worker_fn(void *arg)
+{
+ struct worker_ctx *worker = arg;
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(worker->cpu, &mask);
+
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ return (void *)(uintptr_t)errno;
+
+ while (!worker->stop) {
+ sched_yield();
+ worker->iters++;
+ }
+
+ return NULL;
+}
+
+static int join_worker(struct worker_ctx *worker)
+{
+ void *ret;
+ struct timespec ts;
+ int err;
+
+ if (!worker->started)
+ return 0;
+
+ if (clock_gettime(CLOCK_REALTIME, &ts))
+ return -errno;
+
+ ts.tv_sec += 2;
+ err = pthread_timedjoin_np(worker->tid, &ret, &ts);
+ if (err == ETIMEDOUT)
+ pthread_detach(worker->tid);
+ if (err)
+ return -err;
+
+ if ((uintptr_t)ret)
+ return -(int)(uintptr_t)ret;
+
+ return 0;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct cyclic_kick_wait *skel;
+
+ skel = cyclic_kick_wait__open();
+ SCX_FAIL_IF(!skel, "Failed to open skel");
+ SCX_ENUM_INIT(skel);
+
+ *ctx = skel;
+ return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct cyclic_kick_wait *skel = ctx;
+ struct worker_ctx workers[NR_WORKERS] = {};
+ struct bpf_link *link = NULL;
+ enum scx_test_status status = SCX_TEST_PASS;
+ int test_cpus[NR_TEST_CPUS];
+ int nr_cpus = 0;
+ cpu_set_t mask;
+ int ret, i;
+
+ if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ SCX_ERR("Failed to get affinity (%d)", errno);
+ return SCX_TEST_FAIL;
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &mask))
+ test_cpus[nr_cpus++] = i;
+ if (nr_cpus == NR_TEST_CPUS)
+ break;
+ }
+
+ if (nr_cpus < NR_TEST_CPUS)
+ return SCX_TEST_SKIP;
+
+ skel->rodata->test_cpu_a = test_cpus[0];
+ skel->rodata->test_cpu_b = test_cpus[1];
+ skel->rodata->test_cpu_c = test_cpus[2];
+
+ if (cyclic_kick_wait__load(skel)) {
+ SCX_ERR("Failed to load skel");
+ return SCX_TEST_FAIL;
+ }
+
+ link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops);
+ if (!link) {
+ SCX_ERR("Failed to attach scheduler");
+ return SCX_TEST_FAIL;
+ }
+
+ for (i = 0; i < NR_WORKERS; i++)
+ workers[i].cpu = test_cpus[i / WORKERS_PER_CPU];
+
+ for (i = 0; i < NR_WORKERS; i++) {
+ ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]);
+ if (ret) {
+ SCX_ERR("Failed to create worker thread %d (%d)", i, ret);
+ status = SCX_TEST_FAIL;
+ goto out;
+ }
+ workers[i].started = true;
+ }
+
+ sleep(5);
+
+ if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) {
+ SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)",
+ (unsigned long long)skel->data->uei.kind,
+ (long long)skel->data->uei.exit_code);
+ status = SCX_TEST_FAIL;
+ }
+
+out:
+ for (i = 0; i < NR_WORKERS; i++)
+ workers[i].stop = true;
+
+ for (i = 0; i < NR_WORKERS; i++) {
+ ret = join_worker(&workers[i]);
+ if (ret && status == SCX_TEST_PASS) {
+ SCX_ERR("Failed to join worker thread %d (%d)", i, ret);
+ status = SCX_TEST_FAIL;
+ }
+ }
+
+ if (link)
+ bpf_link__destroy(link);
+
+ return status;
+}
+
+static void cleanup(void *ctx)
+{
+ struct cyclic_kick_wait *skel = ctx;
+
+ cyclic_kick_wait__destroy(skel);
+}
+
+struct scx_test cyclic_kick_wait = {
+ .name = "cyclic_kick_wait",
+ .description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&cyclic_kick_wait)
--
2.53.0
next prev parent reply other threads:[~2026-03-29 0:18 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-29 0:18 [PATCHSET sched_ext/for-7.0-fixes] sched_ext: Fix SCX_KICK_WAIT deadlock Tejun Heo
2026-03-29 0:18 ` [PATCH 1/2] sched_ext: Fix SCX_KICK_WAIT deadlock by deferring wait to balance callback Tejun Heo
2026-03-29 16:26 ` Andrea Righi
2026-03-29 0:18 ` Tejun Heo [this message]
2026-03-29 9:06 ` [PATCH 2/2] selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test Cheng-Yang Chou
2026-03-29 15:52 ` Andrea Righi
2026-03-30 4:40 ` Cheng-Yang Chou
2026-03-30 8:51 ` Christian Loehle
2026-03-30 8:52 ` [PATCHSET sched_ext/for-7.0-fixes] sched_ext: Fix SCX_KICK_WAIT deadlock Christian Loehle
2026-03-30 18:56 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260329001856.835643-3-tj@kernel.org \
--to=tj@kernel.org \
--cc=arighi@nvidia.com \
--cc=changwoo@igalia.com \
--cc=christian.loehle@arm.com \
--cc=emil@etsalapatis.com \
--cc=linux-kernel@vger.kernel.org \
--cc=sched-ext@lists.linux.dev \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.