[PATCH] drm/sched: Add new KUnit test suite for concurrent job submission

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
@ 2026-02-19 14:07 Marco Pagani
  2026-02-23 16:25 ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Marco Pagani @ 2026-02-19 14:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, Matthew Brost, Danilo Krummrich, Philipp Stanner,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter
  Cc: Marco Pagani, dri-devel, linux-kernel

Add a new test suite to simulate concurrent job submissions to the DRM
scheduler. The suite includes two initial test cases: (i) a primary test
case for parallel job submission and (ii) a secondary test case for
interleaved job submission and completion. In the first test case, worker
threads concurrently submit jobs to the scheduler and then the timeline is
manually advanced. In the second test case, worker threads periodically
submit a sequence of jobs to the mock scheduler. Periods are chosen as
harmonic, starting from a base period, to allow interleaving between
submission and completion. To avoid impractically large execution times,
periods are cycled. The timeline is advanced automatically when the jobs
completes. This models how job submission from userspace (in process
context) may interleave with job completion (hrtimer callback interrupt
context, in the test case) by a physical device.

Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
---
Changes in v1:
- Split the original suite into two test cases (Tvrtko Ursulin).
- General test refactoring and variable renaming for clarity.
- Changed parameters to have a fairer workload distribution.
- Improved cleanup logic.
- Added kunit_info() prints for tracing workers.
---
 drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
 1 file changed, 338 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
index 82a41a456b0a..391edcbaf43a 100644
--- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
+++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
@@ -2,6 +2,8 @@
 /* Copyright (c) 2025 Valve Corporation */
 
 #include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
 
 #include "sched_tests.h"
 
@@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
 	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
 }
 
+struct sched_concurrent_context {
+	struct drm_mock_scheduler *sched;
+	struct workqueue_struct *sub_wq;
+	struct kunit *test;
+	struct completion wait_go;
+};
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
+			    struct drm_mock_scheduler *);
+
+KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
+			    struct drm_mock_sched_entity *);
+
+static void complete_destroy_workqueue(void *context)
+{
+	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
+
+	complete_all(&ctx->wait_go);
+
+	destroy_workqueue(ctx->sub_wq);
+}
+
+static int drm_sched_concurrent_init(struct kunit *test)
+{
+	struct sched_concurrent_context *ctx;
+	int ret;
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, ctx);
+
+	init_completion(&ctx->wait_go);
+
+	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
+
+	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	/* Use an unbounded workqueue to maximize job submission concurrency */
+	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
+				      WQ_UNBOUND_MAX_ACTIVE);
+	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
+
+	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	ctx->test = test;
+	test->priv = ctx;
+
+	return 0;
+}
+
+struct drm_sched_parallel_params {
+	const char *description;
+	unsigned int num_jobs;
+	unsigned int num_workers;
+};
+
+static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
+	{
+		.description = "Workers submitting multiple jobs against a single entity",
+		.num_jobs = 4,
+		.num_workers = 16,
+	},
+};
+
+static void
+drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
+{
+	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
+
+struct parallel_worker {
+	struct work_struct work;
+	struct sched_concurrent_context *ctx;
+	struct drm_mock_sched_entity *entity;
+	struct drm_mock_sched_job **jobs;
+	unsigned int id;
+};
+
+static void drm_sched_parallel_worker(struct work_struct *work)
+{
+	const struct drm_sched_parallel_params *params;
+	struct sched_concurrent_context *test_ctx;
+	struct parallel_worker *worker;
+	unsigned int i;
+
+	worker = container_of(work, struct parallel_worker, work);
+	test_ctx = worker->ctx;
+	params = test_ctx->test->param_value;
+
+	wait_for_completion(&test_ctx->wait_go);
+
+	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
+
+	for (i = 0; i < params->num_jobs; i++)
+		drm_mock_sched_job_submit(worker->jobs[i]);
+}
+
+/*
+ * Spawns workers that submit a sequence of jobs to the mock scheduler.
+ * Once all jobs are submitted, the timeline is manually advanced.
+ */
+static void drm_sched_parallel_submit_test(struct kunit *test)
+{
+	struct sched_concurrent_context *ctx = test->priv;
+	const struct drm_sched_parallel_params *params = test->param_value;
+	struct parallel_worker *workers;
+	unsigned int i, j, completed_jobs;
+	bool done;
+	int ret;
+
+	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+				GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, workers);
+
+	/*
+	 * Init workers only after all jobs and entities have been successfully
+	 * allocated. In this way, the cleanup logic for when an assertion fail
+	 * can be simplified.
+	 */
+	for (i = 0; i < params->num_workers; i++) {
+		workers[i].id = i;
+		workers[i].ctx = ctx;
+		workers[i].entity = drm_mock_sched_entity_new(test,
+							      DRM_SCHED_PRIORITY_NORMAL,
+							      ctx->sched);
+
+		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
+						workers[i].entity);
+		KUNIT_ASSERT_EQ(test, ret, 0);
+
+		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+						sizeof(*workers[i].jobs), GFP_KERNEL);
+		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+		for (j = 0; j < params->num_jobs; j++)
+			workers[i].jobs[j] = drm_mock_sched_job_new(test,
+								    workers[i].entity);
+	}
+
+	for (i = 0; i < params->num_workers; i++) {
+		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
+		queue_work(ctx->sub_wq, &workers[i].work);
+	}
+
+	complete_all(&ctx->wait_go);
+	flush_workqueue(ctx->sub_wq);
+
+	for (i = 0; i < params->num_workers; i++) {
+		for (j = 0; j < params->num_jobs; j++) {
+			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
+								 HZ);
+			KUNIT_ASSERT_TRUE(test, done);
+
+			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+			KUNIT_ASSERT_FALSE(test, done);
+		}
+	}
+
+	completed_jobs = drm_mock_sched_advance(ctx->sched,
+						params->num_workers * params->num_jobs);
+	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
+
+	for (i = 0; i < params->num_workers; i++) {
+		for (j = 0; j < params->num_jobs; j++) {
+			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+			KUNIT_ASSERT_TRUE(test, done);
+		}
+	}
+}
+
+struct drm_sched_interleaved_params {
+	const char *description;
+	unsigned int job_base_period_us;
+	unsigned int periods_cycle;
+	unsigned int num_jobs;
+	unsigned int num_workers;
+};
+
+static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
+	{
+		.description = "Workers submitting single jobs against a single entity",
+		.job_base_period_us = 100,
+		.periods_cycle = 10,
+		.num_jobs = 1,
+		.num_workers = 32,
+	},
+	{
+		.description = "Workers submitting multiple jobs against a single entity",
+		.job_base_period_us = 100,
+		.periods_cycle = 10,
+		.num_jobs = 4,
+		.num_workers = 16,
+	},
+};
+
+static void
+drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
+{
+	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
+}
+
+KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
+		  drm_sched_interleaved_desc);
+
+struct interleaved_worker {
+	struct work_struct work;
+	struct sched_concurrent_context *ctx;
+	struct drm_mock_sched_entity *entity;
+	struct drm_mock_sched_job **jobs;
+	unsigned int id;
+	unsigned int period_us;
+	unsigned int timeout_us;
+};
+
+static void drm_sched_interleaved_worker(struct work_struct *work)
+{
+	const struct drm_sched_interleaved_params *params;
+	struct sched_concurrent_context *test_ctx;
+	struct interleaved_worker *worker;
+	unsigned int i;
+	bool done;
+
+	worker = container_of(work, struct interleaved_worker, work);
+	test_ctx = worker->ctx;
+	params = test_ctx->test->param_value;
+
+	wait_for_completion(&test_ctx->wait_go);
+
+	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
+		   worker->id, worker->period_us);
+
+	/* Release jobs as a periodic sequence */
+	for (i = 0; i < params->num_jobs; i++) {
+		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
+		drm_mock_sched_job_submit(worker->jobs[i]);
+
+		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
+							usecs_to_jiffies(worker->timeout_us));
+		if (!done)
+			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
+				   i, worker->id);
+	}
+}
+
+/*
+ * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
+ * Uses harmonic periods to allow interleaving and cycles through them to prevent
+ * excessively large execution times. Since the scheduler serializes jobs from all
+ * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
+ * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
+ * the workers.
+ */
+static void drm_sched_interleaved_submit_test(struct kunit *test)
+{
+	struct sched_concurrent_context *ctx = test->priv;
+	const struct drm_sched_interleaved_params *params = test->param_value;
+	struct interleaved_worker *workers;
+	unsigned int period_max_us, timeout_us;
+	unsigned int i, j;
+	bool done;
+	int ret;
+
+	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
+				GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, workers);
+
+	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
+	timeout_us = params->num_workers * period_max_us * 2;
+
+	/*
+	 * Init workers only after all jobs and entities have been successfully
+	 * allocated. In this way, the cleanup logic for when an assertion fail
+	 * can be simplified.
+	 */
+	for (i = 0; i < params->num_workers; i++) {
+		workers[i].id = i;
+		workers[i].ctx = ctx;
+		workers[i].timeout_us = timeout_us;
+
+		if (i % params->periods_cycle == 0)
+			workers[i].period_us = params->job_base_period_us;
+		else
+			workers[i].period_us = workers[i - 1].period_us * 2;
+
+		workers[i].entity = drm_mock_sched_entity_new(test,
+							      DRM_SCHED_PRIORITY_NORMAL,
+							      ctx->sched);
+
+		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
+						workers[i].entity);
+		KUNIT_ASSERT_EQ(test, ret, 0);
+
+		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
+						sizeof(*workers[i].jobs), GFP_KERNEL);
+		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
+
+		for (j = 0; j < params->num_jobs; j++) {
+			workers[i].jobs[j] = drm_mock_sched_job_new(test,
+								    workers[i].entity);
+			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+			KUNIT_ASSERT_FALSE(test, done);
+		}
+	}
+
+	for (i = 0; i < params->num_workers; i++) {
+		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
+		queue_work(ctx->sub_wq, &workers[i].work);
+	}
+
+	complete_all(&ctx->wait_go);
+	flush_workqueue(ctx->sub_wq);
+
+	for (i = 0; i < params->num_workers; i++) {
+		for (j = 0; j < params->num_jobs; j++) {
+			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
+			KUNIT_ASSERT_TRUE(test, done);
+		}
+	}
+}
+
+static struct kunit_case drm_sched_concurrent_tests[] = {
+	KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, drm_sched_parallel_gen_params),
+	KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, drm_sched_interleaved_gen_params),
+	{}
+};
+
+static struct kunit_suite drm_sched_concurrent = {
+	.name = "drm_sched_concurrent_tests",
+	.init = drm_sched_concurrent_init,
+	.test_cases = drm_sched_concurrent_tests,
+};
+
 static struct kunit_case drm_sched_cancel_tests[] = {
 	KUNIT_CASE(drm_sched_basic_cancel),
 	{}
@@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
 };
 
 kunit_test_suites(&drm_sched_basic,
+		  &drm_sched_concurrent,
 		  &drm_sched_timeout,
 		  &drm_sched_cancel,
 		  &drm_sched_priority,
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-19 14:07 [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission Marco Pagani
@ 2026-02-23 16:25 ` Tvrtko Ursulin
  2026-02-25  7:47   ` Philipp Stanner
  2026-03-17 17:04   ` Marco Pagani
  0 siblings, 2 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2026-02-23 16:25 UTC (permalink / raw)
  To: Marco Pagani, Matthew Brost, Danilo Krummrich, Philipp Stanner,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter
  Cc: dri-devel, linux-kernel


On 19/02/2026 14:07, Marco Pagani wrote:
> Add a new test suite to simulate concurrent job submissions to the DRM
> scheduler. The suite includes two initial test cases: (i) a primary test
> case for parallel job submission and (ii) a secondary test case for
> interleaved job submission and completion. In the first test case, worker
> threads concurrently submit jobs to the scheduler and then the timeline is
> manually advanced. In the second test case, worker threads periodically
> submit a sequence of jobs to the mock scheduler. Periods are chosen as
> harmonic, starting from a base period, to allow interleaving between
> submission and completion. To avoid impractically large execution times,
> periods are cycled. The timeline is advanced automatically when the jobs
> completes. This models how job submission from userspace (in process
> context) may interleave with job completion (hrtimer callback interrupt
> context, in the test case) by a physical device.
> 
> Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
> ---
> Changes in v1:
> - Split the original suite into two test cases (Tvrtko Ursulin).
> - General test refactoring and variable renaming for clarity.
> - Changed parameters to have a fairer workload distribution.
> - Improved cleanup logic.
> - Added kunit_info() prints for tracing workers.
> ---
>   drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
>   1 file changed, 338 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> index 82a41a456b0a..391edcbaf43a 100644
> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> @@ -2,6 +2,8 @@
>   /* Copyright (c) 2025 Valve Corporation */
>   
>   #include <linux/delay.h>
> +#include <linux/completion.h>
> +#include <linux/workqueue.h>
>   
>   #include "sched_tests.h"
>   
> @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
>   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>   }
>   
> +struct sched_concurrent_context {
> +	struct drm_mock_scheduler *sched;
> +	struct workqueue_struct *sub_wq;
> +	struct kunit *test;
> +	struct completion wait_go;
> +};
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> +			    struct drm_mock_scheduler *);
> +
> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> +			    struct drm_mock_sched_entity *);
> +
> +static void complete_destroy_workqueue(void *context)
> +{
> +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
> +
> +	complete_all(&ctx->wait_go);
> +
> +	destroy_workqueue(ctx->sub_wq);
> +}
> +
> +static int drm_sched_concurrent_init(struct kunit *test)
> +{
> +	struct sched_concurrent_context *ctx;
> +	int ret;
> +
> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> +	KUNIT_ASSERT_NOT_NULL(test, ctx);
> +
> +	init_completion(&ctx->wait_go);
> +
> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> +
> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	/* Use an unbounded workqueue to maximize job submission concurrency */
> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> +				      WQ_UNBOUND_MAX_ACTIVE);
> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> +
> +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
> +	KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +	ctx->test = test;
> +	test->priv = ctx;
> +
> +	return 0;
> +}
> +
> +struct drm_sched_parallel_params {
> +	const char *description;
> +	unsigned int num_jobs;
> +	unsigned int num_workers;
> +};
> +
> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
> +	{
> +		.description = "Workers submitting multiple jobs against a single entity",

Each worker has own entity so the description is a bit confusing.

> +		.num_jobs = 4,
> +		.num_workers = 16,
> +	},
> +};
> +
> +static void
> +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
> +{
> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> +}
> +
> +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
> +
> +struct parallel_worker {
> +	struct work_struct work;
> +	struct sched_concurrent_context *ctx;
> +	struct drm_mock_sched_entity *entity;
> +	struct drm_mock_sched_job **jobs;
> +	unsigned int id;
> +};
> +
> +static void drm_sched_parallel_worker(struct work_struct *work)
> +{
> +	const struct drm_sched_parallel_params *params;
> +	struct sched_concurrent_context *test_ctx;
> +	struct parallel_worker *worker;
> +	unsigned int i;
> +
> +	worker = container_of(work, struct parallel_worker, work);
> +	test_ctx = worker->ctx;
> +	params = test_ctx->test->param_value;
> +
> +	wait_for_completion(&test_ctx->wait_go);
> +
> +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
> +
> +	for (i = 0; i < params->num_jobs; i++)
> +		drm_mock_sched_job_submit(worker->jobs[i]);
> +}
> +
> +/*
> + * Spawns workers that submit a sequence of jobs to the mock scheduler.
> + * Once all jobs are submitted, the timeline is manually advanced.
> + */
> +static void drm_sched_parallel_submit_test(struct kunit *test)
> +{
> +	struct sched_concurrent_context *ctx = test->priv;
> +	const struct drm_sched_parallel_params *params = test->param_value;
> +	struct parallel_worker *workers;
> +	unsigned int i, j, completed_jobs;
> +	bool done;
> +	int ret;
> +
> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> +				GFP_KERNEL);
> +	KUNIT_ASSERT_NOT_NULL(test, workers);
> +
> +	/*
> +	 * Init workers only after all jobs and entities have been successfully
> +	 * allocated. In this way, the cleanup logic for when an assertion fail
> +	 * can be simplified.
> +	 */
> +	for (i = 0; i < params->num_workers; i++) {
> +		workers[i].id = i;
> +		workers[i].ctx = ctx;
> +		workers[i].entity = drm_mock_sched_entity_new(test,
> +							      DRM_SCHED_PRIORITY_NORMAL,
> +							      ctx->sched);
> +
> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> +						workers[i].entity);
> +		KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> +						sizeof(*workers[i].jobs), GFP_KERNEL);
> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> +
> +		for (j = 0; j < params->num_jobs; j++)
> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
> +								    workers[i].entity);
> +	}
> +
> +	for (i = 0; i < params->num_workers; i++) {
> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
> +		queue_work(ctx->sub_wq, &workers[i].work);
> +	}
> +
> +	complete_all(&ctx->wait_go);
> +	flush_workqueue(ctx->sub_wq);
> +
> +	for (i = 0; i < params->num_workers; i++) {
> +		for (j = 0; j < params->num_jobs; j++) {
> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
> +								 HZ);
> +			KUNIT_ASSERT_TRUE(test, done);
> +
> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> +			KUNIT_ASSERT_FALSE(test, done);

This second assert does not seem to be bringing much value, but instead 
makes the reader ask themselves why it is there. Remove it?

Hmm in fact this whole loop could be removed. All that it is needed 
below is to wait for the last job to be completed.

> +		}
> +	}
> +
> +	completed_jobs = drm_mock_sched_advance(ctx->sched,
> +						params->num_workers * params->num_jobs);
> +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
> +
> +	for (i = 0; i < params->num_workers; i++) {
> +		for (j = 0; j < params->num_jobs; j++) {
> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> +			KUNIT_ASSERT_TRUE(test, done);
> +		}
> +	}

So here, after advancing you just need to wait on the last job to complete.

Not a deal breaker to have it verbose but usually the less is better and 
easier to spot the key thing being tested and what are the functional steps.

> +}
> +
> +struct drm_sched_interleaved_params {
> +	const char *description;
> +	unsigned int job_base_period_us;
> +	unsigned int periods_cycle;
> +	unsigned int num_jobs;
> +	unsigned int num_workers;
> +};
> +
> +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
> +	{
> +		.description = "Workers submitting single jobs against a single entity",

As with the parallel description.

> +		.job_base_period_us = 100,
> +		.periods_cycle = 10,
> +		.num_jobs = 1,
> +		.num_workers = 32,
> +	},
> +	{
> +		.description = "Workers submitting multiple jobs against a single entity",
> +		.job_base_period_us = 100,
> +		.periods_cycle = 10,
> +		.num_jobs = 4,
> +		.num_workers = 16,
> +	},
> +};
> +
> +static void
> +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
> +{
> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> +}
> +
> +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
> +		  drm_sched_interleaved_desc);
> +
> +struct interleaved_worker {
> +	struct work_struct work;
> +	struct sched_concurrent_context *ctx;
> +	struct drm_mock_sched_entity *entity;
> +	struct drm_mock_sched_job **jobs;
> +	unsigned int id;
> +	unsigned int period_us;
> +	unsigned int timeout_us;
> +};
> +
> +static void drm_sched_interleaved_worker(struct work_struct *work)
> +{
> +	const struct drm_sched_interleaved_params *params;
> +	struct sched_concurrent_context *test_ctx;
> +	struct interleaved_worker *worker;
> +	unsigned int i;
> +	bool done;
> +
> +	worker = container_of(work, struct interleaved_worker, work);
> +	test_ctx = worker->ctx;
> +	params = test_ctx->test->param_value;
> +
> +	wait_for_completion(&test_ctx->wait_go);
> +
> +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
> +		   worker->id, worker->period_us);
> +
> +	/* Release jobs as a periodic sequence */
> +	for (i = 0; i < params->num_jobs; i++) {
> +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
> +		drm_mock_sched_job_submit(worker->jobs[i]);
> +
> +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
> +							usecs_to_jiffies(worker->timeout_us));
> +		if (!done)
> +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
> +				   i, worker->id);
> +	}
> +}
> +
> +/*
> + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
> + * Uses harmonic periods to allow interleaving and cycles through them to prevent
> + * excessively large execution times. Since the scheduler serializes jobs from all
> + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
> + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
> + * the workers.
> + */
> +static void drm_sched_interleaved_submit_test(struct kunit *test)
> +{
> +	struct sched_concurrent_context *ctx = test->priv;
> +	const struct drm_sched_interleaved_params *params = test->param_value;
> +	struct interleaved_worker *workers;
> +	unsigned int period_max_us, timeout_us;
> +	unsigned int i, j;
> +	bool done;
> +	int ret;
> +
> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> +				GFP_KERNEL);
> +	KUNIT_ASSERT_NOT_NULL(test, workers);
> +
> +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
> +	timeout_us = params->num_workers * period_max_us * 2;
> +
> +	/*
> +	 * Init workers only after all jobs and entities have been successfully
> +	 * allocated. In this way, the cleanup logic for when an assertion fail
> +	 * can be simplified.
> +	 */
> +	for (i = 0; i < params->num_workers; i++) {
> +		workers[i].id = i;
> +		workers[i].ctx = ctx;
> +		workers[i].timeout_us = timeout_us;
> +
> +		if (i % params->periods_cycle == 0)
> +			workers[i].period_us = params->job_base_period_us;
> +		else
> +			workers[i].period_us = workers[i - 1].period_us * 2;

Are the two if statements effectively equivalent to:

  period_us = params->job_base_period_us << (i % params->periods_cycle);

?

Also, do you have an idea how to locally calculate a suitable 
periods_cycle instead of having to come up with a number in the 
drm_sched_interleaved_params array. Just strikes me as hard to know what 
to put in there if someone would want to add a test.

> +
> +		workers[i].entity = drm_mock_sched_entity_new(test,
> +							      DRM_SCHED_PRIORITY_NORMAL,
> +							      ctx->sched);
> +
> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> +						workers[i].entity);
> +		KUNIT_ASSERT_EQ(test, ret, 0);
> +
> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> +						sizeof(*workers[i].jobs), GFP_KERNEL);
> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> +
> +		for (j = 0; j < params->num_jobs; j++) {
> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
> +								    workers[i].entity);
> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> +			KUNIT_ASSERT_FALSE(test, done);

Same as above, this is asserted by a basic test case so I'd just remove it.

> +		}
> +	}
> +
> +	for (i = 0; i < params->num_workers; i++) {
> +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
> +		queue_work(ctx->sub_wq, &workers[i].work);
> +	}
> +
> +	complete_all(&ctx->wait_go);
> +	flush_workqueue(ctx->sub_wq);
> +
> +	for (i = 0; i < params->num_workers; i++) {
> +		for (j = 0; j < params->num_jobs; j++) {
> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> +			KUNIT_ASSERT_TRUE(test, done);
> +		}
> +	}

Here as well you could wait just for the last job per worker.

On the overal submission pattern - Don't you end up with a very uneven 
activity between the workers? Because the job duration is doubling and 
workers are single-shot, once the low index workers are done they are 
done, and all that remains are the higher ones, and so the wave of fewer 
and fewer active workers continues. Low index worker do not end up 
racing with the job completions of the high index workers but only 
between themselves, and even that with the cycle=10 workers=16 case is 
even more limited.

Alternative approach could be to set a per test time budget and just 
keep the workers submitting until over. It would be simpler to 
understand and there would be more submit/complete overlap.

Regards,

Tvrtko

> +}
> +
> +static struct kunit_case drm_sched_concurrent_tests[] = {
> +	KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, drm_sched_parallel_gen_params),
> +	KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, drm_sched_interleaved_gen_params),
> +	{}
> +};
> +
> +static struct kunit_suite drm_sched_concurrent = {
> +	.name = "drm_sched_concurrent_tests",
> +	.init = drm_sched_concurrent_init,
> +	.test_cases = drm_sched_concurrent_tests,
> +};
> +
>   static struct kunit_case drm_sched_cancel_tests[] = {
>   	KUNIT_CASE(drm_sched_basic_cancel),
>   	{}
> @@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
>   };
>   
>   kunit_test_suites(&drm_sched_basic,
> +		  &drm_sched_concurrent,
>   		  &drm_sched_timeout,
>   		  &drm_sched_cancel,
>   		  &drm_sched_priority,


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-23 16:25 ` Tvrtko Ursulin
@ 2026-02-25  7:47   ` Philipp Stanner
  2026-02-25  9:03     ` Tvrtko Ursulin
  2026-03-18 12:43     ` Marco Pagani
  2026-03-17 17:04   ` Marco Pagani
  1 sibling, 2 replies; 13+ messages in thread
From: Philipp Stanner @ 2026-02-25  7:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, Marco Pagani, Matthew Brost, Danilo Krummrich,
	Philipp Stanner, Christian König, Maarten Lankhorst,
	Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter
  Cc: dri-devel, linux-kernel

On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
> 
> On 19/02/2026 14:07, Marco Pagani wrote:
> > Add a new test suite to simulate concurrent job submissions to the DRM
> > scheduler. The suite includes two initial test cases: (i) a primary test
> > case for parallel job submission and (ii) a secondary test case for
> > interleaved job submission and completion. In the first test case, worker
> > threads concurrently submit jobs to the scheduler and then the timeline is
> > manually advanced. In the second test case, worker threads periodically
> > submit a sequence of jobs to the mock scheduler. Periods are chosen as
> > harmonic, starting from a base period, to allow interleaving between
> > submission and completion. To avoid impractically large execution times,
> > periods are cycled. The timeline is advanced automatically when the jobs
> > completes. This models how job submission from userspace (in process
> > context) may interleave with job completion (hrtimer callback interrupt
> > context, in the test case) by a physical device.

I still maintain the opinion expressed the last time: that the commit
message should make explicit why the patch / test is added. Which this
doesn't do. It just says: "We add X", but not "Currently, the problem
is that YZ, thus we need X".
(also breaking longer commit messages into paragraphs is nice)

Also see my comments about interleaved submits below.

> > 
> > Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
> > ---
> > Changes in v1:
> > - Split the original suite into two test cases (Tvrtko Ursulin).
> > - General test refactoring and variable renaming for clarity.
> > - Changed parameters to have a fairer workload distribution.
> > - Improved cleanup logic.
> > - Added kunit_info() prints for tracing workers.
> > ---
> >   drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
> >   1 file changed, 338 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > index 82a41a456b0a..391edcbaf43a 100644
> > --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > @@ -2,6 +2,8 @@
> >   /* Copyright (c) 2025 Valve Corporation */
> >   
> >   #include <linux/delay.h>
> > +#include <linux/completion.h>
> > +#include <linux/workqueue.h>
> >   
> >   #include "sched_tests.h"
> >   
> > @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
> >   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
> >   }
> >   
> > +struct sched_concurrent_context {
> > +	struct drm_mock_scheduler *sched;
> > +	struct workqueue_struct *sub_wq;
> > +	struct kunit *test;
> > +	struct completion wait_go;
> > +};
> > +
> > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> > +			    struct drm_mock_scheduler *);
> > +
> > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> > +			    struct drm_mock_sched_entity *);
> > +
> > +static void complete_destroy_workqueue(void *context)
> > +{
> > +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
> > +
> > +	complete_all(&ctx->wait_go);
> > +

nit: do we need that empty line

> > +	destroy_workqueue(ctx->sub_wq);
> > +}
> > +
> > +static int drm_sched_concurrent_init(struct kunit *test)
> > +{
> > +	struct sched_concurrent_context *ctx;
> > +	int ret;
> > +
> > +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> > +	KUNIT_ASSERT_NOT_NULL(test, ctx);
> > +
> > +	init_completion(&ctx->wait_go);
> > +
> > +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> > +
> > +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +	/* Use an unbounded workqueue to maximize job submission concurrency */
> > +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> > +				      WQ_UNBOUND_MAX_ACTIVE);
> > +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> > +
> > +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
> > +	KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +	ctx->test = test;
> > +	test->priv = ctx;
> > +
> > +	return 0;
> > +}
> > +
> > +struct drm_sched_parallel_params {
> > +	const char *description;
> > +	unsigned int num_jobs;
> > +	unsigned int num_workers;
> > +};
> > +
> > +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
> > +	{
> > +		.description = "Workers submitting multiple jobs against a single entity",
> 
> Each worker has own entity so the description is a bit confusing.

Do you have a suggestion for a better one?

> 
> > +		.num_jobs = 4,
> > +		.num_workers = 16,
> > +	},
> > +};
> > +
> > +static void
> > +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
> > +{
> > +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > +}
> > +
> > +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
> > +
> > +struct parallel_worker {
> > +	struct work_struct work;
> > +	struct sched_concurrent_context *ctx;
> > +	struct drm_mock_sched_entity *entity;
> > +	struct drm_mock_sched_job **jobs;
> > +	unsigned int id;
> > +};
> > +
> > +static void drm_sched_parallel_worker(struct work_struct *work)
> > +{
> > +	const struct drm_sched_parallel_params *params;
> > +	struct sched_concurrent_context *test_ctx;
> > +	struct parallel_worker *worker;
> > +	unsigned int i;
> > +
> > +	worker = container_of(work, struct parallel_worker, work);
> > +	test_ctx = worker->ctx;
> > +	params = test_ctx->test->param_value;
> > +
> > +	wait_for_completion(&test_ctx->wait_go);
> > +
> > +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
> > +
> > +	for (i = 0; i < params->num_jobs; i++)
> > +		drm_mock_sched_job_submit(worker->jobs[i]);
> > +}
> > +
> > +/*
> > + * Spawns workers that submit a sequence of jobs to the mock scheduler.
> > + * Once all jobs are submitted, the timeline is manually advanced.
> > + */
> > +static void drm_sched_parallel_submit_test(struct kunit *test)
> > +{
> > +	struct sched_concurrent_context *ctx = test->priv;
> > +	const struct drm_sched_parallel_params *params = test->param_value;
> > +	struct parallel_worker *workers;
> > +	unsigned int i, j, completed_jobs;
> > +	bool done;
> > +	int ret;
> > +
> > +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > +				GFP_KERNEL);
> > +	KUNIT_ASSERT_NOT_NULL(test, workers);
> > +
> > +	/*
> > +	 * Init workers only after all jobs and entities have been successfully
> > +	 * allocated. In this way, the cleanup logic for when an assertion fail
> > +	 * can be simplified.
> > +	 */
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		workers[i].id = i;
> > +		workers[i].ctx = ctx;
> > +		workers[i].entity = drm_mock_sched_entity_new(test,
> > +							      DRM_SCHED_PRIORITY_NORMAL,
> > +							      ctx->sched);
> > +
> > +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> > +						workers[i].entity);
> > +		KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> > +						sizeof(*workers[i].jobs), GFP_KERNEL);
> > +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> > +
> > +		for (j = 0; j < params->num_jobs; j++)
> > +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
> > +								    workers[i].entity);

The kernel doesn't strictly require the 80 column limit anymore. In
some cases it can make sense to exceed it a bit to avoid an ugly line
break.

Often one can make code a bit more readable by creating a helper
variable such as

struct xy *pos = &workers[i];

You need workers[i] at meany places, so that definitely seems handy.


> > +	}
> > +
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
> > +		queue_work(ctx->sub_wq, &workers[i].work);
> > +	}
> > +
> > +	complete_all(&ctx->wait_go);
> > +	flush_workqueue(ctx->sub_wq);
> > +
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		for (j = 0; j < params->num_jobs; j++) {
> > +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
> > +								 HZ);

same

> > +			KUNIT_ASSERT_TRUE(test, done);
> > +
> > +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +			KUNIT_ASSERT_FALSE(test, done);
> 
> This second assert does not seem to be bringing much value, but instead 
> makes the reader ask themselves why it is there. Remove it?
> 
> Hmm in fact this whole loop could be removed. All that it is needed 
> below is to wait for the last job to be completed.

I suppose it's being tested whether all jobs are finished. That sounds
clean and not harmful to me.

> 
> > +		}
> > +	}
> > +
> > +	completed_jobs = drm_mock_sched_advance(ctx->sched,
> > +						params->num_workers * params->num_jobs);
> > +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);

`params` doesn't change here anymore, so the lengthy multiplication
here, which is needed at two places, can be made a const at the top,
making the code more readable.

> > +
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		for (j = 0; j < params->num_jobs; j++) {
> > +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +			KUNIT_ASSERT_TRUE(test, done);
> > +		}
> > +	}
> 
> So here, after advancing you just need to wait on the last job to complete.
> 
> Not a deal breaker to have it verbose but usually the less is better and 
> easier to spot the key thing being tested and what are the functional steps.

depends a bit on what Marco intended. Looks like "better safe than
sorry" to me?

> 
> > +}
> > +
> > +struct drm_sched_interleaved_params {
> > +	const char *description;
> > +	unsigned int job_base_period_us;
> > +	unsigned int periods_cycle;
> > +	unsigned int num_jobs;
> > +	unsigned int num_workers;
> > +};
> > +
> > +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
> > +	{
> > +		.description = "Workers submitting single jobs against a single entity",
> 
> As with the parallel description.

I think (see again my comment about the commit message) what the test
wants implement is threaded submission to a entity shared between
threads. That can certainly be made more verbose.

> 
> > +		.job_base_period_us = 100,
> > +		.periods_cycle = 10,
> > +		.num_jobs = 1,
> > +		.num_workers = 32,
> > +	},
> > +	{
> > +		.description = "Workers submitting multiple jobs against a single entity",
> > +		.job_base_period_us = 100,
> > +		.periods_cycle = 10,
> > +		.num_jobs = 4,
> > +		.num_workers = 16,
> > +	},
> > +};
> > +
> > +static void
> > +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
> > +{
> > +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > +}
> > +
> > +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
> > +		  drm_sched_interleaved_desc);
> > +
> > +struct interleaved_worker {
> > +	struct work_struct work;
> > +	struct sched_concurrent_context *ctx;
> > +	struct drm_mock_sched_entity *entity;
> > +	struct drm_mock_sched_job **jobs;
> > +	unsigned int id;
> > +	unsigned int period_us;
> > +	unsigned int timeout_us;
> > +};
> > +
> > +static void drm_sched_interleaved_worker(struct work_struct *work)
> > +{
> > +	const struct drm_sched_interleaved_params *params;
> > +	struct sched_concurrent_context *test_ctx;
> > +	struct interleaved_worker *worker;
> > +	unsigned int i;
> > +	bool done;
> > +
> > +	worker = container_of(work, struct interleaved_worker, work);
> > +	test_ctx = worker->ctx;
> > +	params = test_ctx->test->param_value;
> > +
> > +	wait_for_completion(&test_ctx->wait_go);
> > +
> > +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
> > +		   worker->id, worker->period_us);
> > +
> > +	/* Release jobs as a periodic sequence */
> > +	for (i = 0; i < params->num_jobs; i++) {
> > +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
> > +		drm_mock_sched_job_submit(worker->jobs[i]);
> > +
> > +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],

worker->jobs[i] is used at 3 places and can be made a helper variable.

> > +							usecs_to_jiffies(worker->timeout_us));
> > +		if (!done)
> > +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
> > +				   i, worker->id);
> > +	}
> > +}
> > +
> > +/*
> > + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
> > + * Uses harmonic periods to allow interleaving and cycles through them to prevent
> > + * excessively large execution times.
> > 

What are "large execution times", like too large for the test
framework?

> >  Since the scheduler serializes jobs from all
> > + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
> > + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
> > + * the workers.
> > + */
> > +static void drm_sched_interleaved_submit_test(struct kunit *test)

It appears to me that this is effectively a separate test, which might
want its own patch. What is the motivation behind adding it?

> > +{
> > +	struct sched_concurrent_context *ctx = test->priv;
> > +	const struct drm_sched_interleaved_params *params = test->param_value;
> > +	struct interleaved_worker *workers;
> > +	unsigned int period_max_us, timeout_us;
> > +	unsigned int i, j;
> > +	bool done;
> > +	int ret;
> > +
> > +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > +				GFP_KERNEL);
> > +	KUNIT_ASSERT_NOT_NULL(test, workers);
> > +
> > +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
> > +	timeout_us = params->num_workers * period_max_us * 2;
> > +
> > +	/*
> > +	 * Init workers only after all jobs and entities have been successfully
> > +	 * allocated. In this way, the cleanup logic for when an assertion fail
> > +	 * can be simplified.
> > +	 */
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		workers[i].id = i;
> > +		workers[i].ctx = ctx;
> > +		workers[i].timeout_us = timeout_us;

helper variable for workers[i].


P.


> > +
> > +		if (i % params->periods_cycle == 0)
> > +			workers[i].period_us = params->job_base_period_us;
> > +		else
> > +			workers[i].period_us = workers[i - 1].period_us * 2;
> 
> Are the two if statements effectively equivalent to:
> 
>   period_us = params->job_base_period_us << (i % params->periods_cycle);
> 
> ?
> 
> Also, do you have an idea how to locally calculate a suitable 
> periods_cycle instead of having to come up with a number in the 
> drm_sched_interleaved_params array. Just strikes me as hard to know what 
> to put in there if someone would want to add a test.
> 
> > +
> > +		workers[i].entity = drm_mock_sched_entity_new(test,
> > +							      DRM_SCHED_PRIORITY_NORMAL,
> > +							      ctx->sched);
> > +
> > +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> > +						workers[i].entity);
> > +		KUNIT_ASSERT_EQ(test, ret, 0);
> > +
> > +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
> > +						sizeof(*workers[i].jobs), GFP_KERNEL);
> > +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
> > +
> > +		for (j = 0; j < params->num_jobs; j++) {
> > +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
> > +								    workers[i].entity);
> > +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +			KUNIT_ASSERT_FALSE(test, done);
> 
> Same as above, this is asserted by a basic test case so I'd just remove it.
> 
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
> > +		queue_work(ctx->sub_wq, &workers[i].work);
> > +	}
> > +
> > +	complete_all(&ctx->wait_go);
> > +	flush_workqueue(ctx->sub_wq);
> > +
> > +	for (i = 0; i < params->num_workers; i++) {
> > +		for (j = 0; j < params->num_jobs; j++) {
> > +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
> > +			KUNIT_ASSERT_TRUE(test, done);
> > +		}
> > +	}
> 
> Here as well you could wait just for the last job per worker.
> 
> On the overal submission pattern - Don't you end up with a very uneven 
> activity between the workers? Because the job duration is doubling and 
> workers are single-shot, once the low index workers are done they are
> done, and all that remains are the higher ones, and so the wave of fewer 
> and fewer active workers continues. Low index worker do not end up 
> racing with the job completions of the high index workers but only 
> between themselves, and even that with the cycle=10 workers=16 case is 
> even more limited.
> 
> Alternative approach could be to set a per test time budget and just 
> keep the workers submitting until over. It would be simpler to 
> understand and there would be more submit/complete overlap.
> 
> Regards,
> 
> Tvrtko
> 
> > +}
> > +
> > +static struct kunit_case drm_sched_concurrent_tests[] = {
> > +	KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, drm_sched_parallel_gen_params),
> > +	KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, drm_sched_interleaved_gen_params),
> > +	{}
> > +};
> > +
> > +static struct kunit_suite drm_sched_concurrent = {
> > +	.name = "drm_sched_concurrent_tests",
> > +	.init = drm_sched_concurrent_init,
> > +	.test_cases = drm_sched_concurrent_tests,
> > +};
> > +
> >   static struct kunit_case drm_sched_cancel_tests[] = {
> >   	KUNIT_CASE(drm_sched_basic_cancel),
> >   	{}
> > @@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
> >   };
> >   
> >   kunit_test_suites(&drm_sched_basic,
> > +		  &drm_sched_concurrent,
> >   		  &drm_sched_timeout,
> >   		  &drm_sched_cancel,
> >   		  &drm_sched_priority,
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-25  7:47   ` Philipp Stanner
@ 2026-02-25  9:03     ` Tvrtko Ursulin
  2026-03-17 17:27       ` Marco Pagani
  2026-03-18 12:43     ` Marco Pagani
  1 sibling, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2026-02-25  9:03 UTC (permalink / raw)
  To: phasta, Marco Pagani, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter
  Cc: dri-devel, linux-kernel


On 25/02/2026 07:47, Philipp Stanner wrote:
> On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
>>
>> On 19/02/2026 14:07, Marco Pagani wrote:
>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>> case for parallel job submission and (ii) a secondary test case for
>>> interleaved job submission and completion. In the first test case, worker
>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>> manually advanced. In the second test case, worker threads periodically
>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>> harmonic, starting from a base period, to allow interleaving between
>>> submission and completion. To avoid impractically large execution times,
>>> periods are cycled. The timeline is advanced automatically when the jobs
>>> completes. This models how job submission from userspace (in process
>>> context) may interleave with job completion (hrtimer callback interrupt
>>> context, in the test case) by a physical device.
> 
> I still maintain the opinion expressed the last time: that the commit
> message should make explicit why the patch / test is added. Which this
> doesn't do. It just says: "We add X", but not "Currently, the problem
> is that YZ, thus we need X".
> (also breaking longer commit messages into paragraphs is nice)
> 
> Also see my comments about interleaved submits below.

I'll address the ones addressed to me.

8><

>>> +struct drm_sched_parallel_params {
>>> +	const char *description;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_workers;
>>> +};
>>> +
>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>> +	{
>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>
>> Each worker has own entity so the description is a bit confusing.
> 
> Do you have a suggestion for a better one?

Along the lines of:

"Multiple workers submitting multiple jobs from their entity"

8><

>>> +	}
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>> +	}
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +	flush_workqueue(ctx->sub_wq);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>> +								 HZ);
> 
> same
> 
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_FALSE(test, done);
>>
>> This second assert does not seem to be bringing much value, but instead
>> makes the reader ask themselves why it is there. Remove it?
>>
>> Hmm in fact this whole loop could be removed. All that it is needed
>> below is to wait for the last job to be completed.
> 
> I suppose it's being tested whether all jobs are finished. That sounds
> clean and not harmful to me.

No, it is assert false. It is testing the jobs have been scheduled but 
not completed before the timeline is manually advanced. Both those 
behaviours are already covered by the existing basic test cases.

In my view the general best practice is to focus on the thing being 
tested, which in this case is the submission side of things. The rest 
can just distract the reader. And in this case that is parallel 
submission, which is all done and dusted by the time flush_workqueue 
above finishes. Everything after that point is just test cleanup.

So I see it as this:

/* Release parallel submit workers */

complete_all(&ctx->wait_go);

/* Wait for all of them to submit all their jobs */

flush_workqueue(ctx->sub_wq);

/* Testing done, cleanup all jobs before exiting */

completed_jobs = drm_mock_sched_advance(ctx->sched,
					params->num_workers * params->num_jobs);
KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * 
params->num_jobs);

for (i = 0; i < params->num_workers; i++) {
	done = 
drm_mock_sched_job_wait_finished(workers[i].jobs[params->num_jobs - 1], HZ);
	KUNIT_ASSERT_TRUE(test, done);
}

Regards,

Tvrtko


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-23 16:25 ` Tvrtko Ursulin
  2026-02-25  7:47   ` Philipp Stanner
@ 2026-03-17 17:04   ` Marco Pagani
  2026-03-18 10:51     ` Tvrtko Ursulin
  1 sibling, 1 reply; 13+ messages in thread
From: Marco Pagani @ 2026-03-17 17:04 UTC (permalink / raw)
  To: Tvrtko Ursulin, Philipp Stanner
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 23/02/2026 17:25, Tvrtko Ursulin wrote:
> 
> On 19/02/2026 14:07, Marco Pagani wrote:
>> Add a new test suite to simulate concurrent job submissions to the DRM
>> scheduler. The suite includes two initial test cases: (i) a primary test
>> case for parallel job submission and (ii) a secondary test case for
>> interleaved job submission and completion. In the first test case, worker
>> threads concurrently submit jobs to the scheduler and then the timeline is
>> manually advanced. In the second test case, worker threads periodically
>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>> harmonic, starting from a base period, to allow interleaving between
>> submission and completion. To avoid impractically large execution times,
>> periods are cycled. The timeline is advanced automatically when the jobs
>> completes. This models how job submission from userspace (in process
>> context) may interleave with job completion (hrtimer callback interrupt
>> context, in the test case) by a physical device.
>>
>> Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
>> ---
>> Changes in v1:
>> - Split the original suite into two test cases (Tvrtko Ursulin).
>> - General test refactoring and variable renaming for clarity.
>> - Changed parameters to have a fairer workload distribution.
>> - Improved cleanup logic.
>> - Added kunit_info() prints for tracing workers.
>> ---
>>   drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
>>   1 file changed, 338 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> index 82a41a456b0a..391edcbaf43a 100644
>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>> @@ -2,6 +2,8 @@
>>   /* Copyright (c) 2025 Valve Corporation */
>>   
>>   #include <linux/delay.h>
>> +#include <linux/completion.h>
>> +#include <linux/workqueue.h>
>>   
>>   #include "sched_tests.h"
>>   
>> @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>   }
>>   
>> +struct sched_concurrent_context {
>> +	struct drm_mock_scheduler *sched;
>> +	struct workqueue_struct *sub_wq;
>> +	struct kunit *test;
>> +	struct completion wait_go;
>> +};
>> +
>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>> +			    struct drm_mock_scheduler *);
>> +
>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>> +			    struct drm_mock_sched_entity *);
>> +
>> +static void complete_destroy_workqueue(void *context)
>> +{
>> +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
>> +
>> +	complete_all(&ctx->wait_go);
>> +
>> +	destroy_workqueue(ctx->sub_wq);
>> +}
>> +
>> +static int drm_sched_concurrent_init(struct kunit *test)
>> +{
>> +	struct sched_concurrent_context *ctx;
>> +	int ret;
>> +
>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>> +	KUNIT_ASSERT_NOT_NULL(test, ctx);
>> +
>> +	init_completion(&ctx->wait_go);
>> +
>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>> +
>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>> +				      WQ_UNBOUND_MAX_ACTIVE);
>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>> +
>> +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +	ctx->test = test;
>> +	test->priv = ctx;
>> +
>> +	return 0;
>> +}
>> +
>> +struct drm_sched_parallel_params {
>> +	const char *description;
>> +	unsigned int num_jobs;
>> +	unsigned int num_workers;
>> +};
>> +
>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>> +	{
>> +		.description = "Workers submitting multiple jobs against a single entity",
> 
> Each worker has own entity so the description is a bit confusing.

Right, I'll change it "Multiple workers submitting multiple jobs from their entity"
as you suggested in the following email.

>> +		.num_jobs = 4,
>> +		.num_workers = 16,
>> +	},
>> +};
>> +
>> +static void
>> +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
>> +{
>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>> +}
>> +
>> +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
>> +
>> +struct parallel_worker {
>> +	struct work_struct work;
>> +	struct sched_concurrent_context *ctx;
>> +	struct drm_mock_sched_entity *entity;
>> +	struct drm_mock_sched_job **jobs;
>> +	unsigned int id;
>> +};
>> +
>> +static void drm_sched_parallel_worker(struct work_struct *work)
>> +{
>> +	const struct drm_sched_parallel_params *params;
>> +	struct sched_concurrent_context *test_ctx;
>> +	struct parallel_worker *worker;
>> +	unsigned int i;
>> +
>> +	worker = container_of(work, struct parallel_worker, work);
>> +	test_ctx = worker->ctx;
>> +	params = test_ctx->test->param_value;
>> +
>> +	wait_for_completion(&test_ctx->wait_go);
>> +
>> +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
>> +
>> +	for (i = 0; i < params->num_jobs; i++)
>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>> +}
>> +
>> +/*
>> + * Spawns workers that submit a sequence of jobs to the mock scheduler.
>> + * Once all jobs are submitted, the timeline is manually advanced.
>> + */
>> +static void drm_sched_parallel_submit_test(struct kunit *test)
>> +{
>> +	struct sched_concurrent_context *ctx = test->priv;
>> +	const struct drm_sched_parallel_params *params = test->param_value;
>> +	struct parallel_worker *workers;
>> +	unsigned int i, j, completed_jobs;
>> +	bool done;
>> +	int ret;
>> +
>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>> +				GFP_KERNEL);
>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>> +
>> +	/*
>> +	 * Init workers only after all jobs and entities have been successfully
>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>> +	 * can be simplified.
>> +	 */
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		workers[i].id = i;
>> +		workers[i].ctx = ctx;
>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>> +							      DRM_SCHED_PRIORITY_NORMAL,
>> +							      ctx->sched);
>> +
>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>> +						workers[i].entity);
>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>> +
>> +		for (j = 0; j < params->num_jobs; j++)
>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>> +								    workers[i].entity);
>> +	}
>> +
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>> +		queue_work(ctx->sub_wq, &workers[i].work);
>> +	}
>> +
>> +	complete_all(&ctx->wait_go);
>> +	flush_workqueue(ctx->sub_wq);
>> +
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		for (j = 0; j < params->num_jobs; j++) {
>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>> +								 HZ);
>> +			KUNIT_ASSERT_TRUE(test, done);
>> +
>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>> +			KUNIT_ASSERT_FALSE(test, done);
> 
> This second assert does not seem to be bringing much value, but instead 
> makes the reader ask themselves why it is there. Remove it?
> 
> Hmm in fact this whole loop could be removed. All that it is needed 
> below is to wait for the last job to be completed.

I'll reply to this in the following mail since Philip joined the conversation.
 
>> +		}
>> +	}
>> +
>> +	completed_jobs = drm_mock_sched_advance(ctx->sched,
>> +						params->num_workers * params->num_jobs);
>> +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
>> +
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		for (j = 0; j < params->num_jobs; j++) {
>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>> +			KUNIT_ASSERT_TRUE(test, done);
>> +		}
>> +	}
> 
> So here, after advancing you just need to wait on the last job to complete.
> 
> Not a deal breaker to have it verbose but usually the less is better and 
> easier to spot the key thing being tested and what are the functional steps.
> 
>> +}
>> +
>> +struct drm_sched_interleaved_params {
>> +	const char *description;
>> +	unsigned int job_base_period_us;
>> +	unsigned int periods_cycle;
>> +	unsigned int num_jobs;
>> +	unsigned int num_workers;
>> +};
>> +
>> +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
>> +	{
>> +		.description = "Workers submitting single jobs against a single entity",
> 
> As with the parallel description.

Agreed.
 
>> +		.job_base_period_us = 100,
>> +		.periods_cycle = 10,
>> +		.num_jobs = 1,
>> +		.num_workers = 32,
>> +	},
>> +	{
>> +		.description = "Workers submitting multiple jobs against a single entity",
>> +		.job_base_period_us = 100,
>> +		.periods_cycle = 10,
>> +		.num_jobs = 4,
>> +		.num_workers = 16,
>> +	},
>> +};
>> +
>> +static void
>> +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
>> +{
>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>> +}
>> +
>> +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
>> +		  drm_sched_interleaved_desc);
>> +
>> +struct interleaved_worker {
>> +	struct work_struct work;
>> +	struct sched_concurrent_context *ctx;
>> +	struct drm_mock_sched_entity *entity;
>> +	struct drm_mock_sched_job **jobs;
>> +	unsigned int id;
>> +	unsigned int period_us;
>> +	unsigned int timeout_us;
>> +};
>> +
>> +static void drm_sched_interleaved_worker(struct work_struct *work)
>> +{
>> +	const struct drm_sched_interleaved_params *params;
>> +	struct sched_concurrent_context *test_ctx;
>> +	struct interleaved_worker *worker;
>> +	unsigned int i;
>> +	bool done;
>> +
>> +	worker = container_of(work, struct interleaved_worker, work);
>> +	test_ctx = worker->ctx;
>> +	params = test_ctx->test->param_value;
>> +
>> +	wait_for_completion(&test_ctx->wait_go);
>> +
>> +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
>> +		   worker->id, worker->period_us);
>> +
>> +	/* Release jobs as a periodic sequence */
>> +	for (i = 0; i < params->num_jobs; i++) {
>> +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>> +
>> +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
>> +							usecs_to_jiffies(worker->timeout_us));
>> +		if (!done)
>> +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
>> +				   i, worker->id);
>> +	}
>> +}
>> +
>> +/*
>> + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
>> + * Uses harmonic periods to allow interleaving and cycles through them to prevent
>> + * excessively large execution times. Since the scheduler serializes jobs from all
>> + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
>> + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
>> + * the workers.
>> + */
>> +static void drm_sched_interleaved_submit_test(struct kunit *test)
>> +{
>> +	struct sched_concurrent_context *ctx = test->priv;
>> +	const struct drm_sched_interleaved_params *params = test->param_value;
>> +	struct interleaved_worker *workers;
>> +	unsigned int period_max_us, timeout_us;
>> +	unsigned int i, j;
>> +	bool done;
>> +	int ret;
>> +
>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>> +				GFP_KERNEL);
>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>> +
>> +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
>> +	timeout_us = params->num_workers * period_max_us * 2;
>> +
>> +	/*
>> +	 * Init workers only after all jobs and entities have been successfully
>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>> +	 * can be simplified.
>> +	 */
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		workers[i].id = i;
>> +		workers[i].ctx = ctx;
>> +		workers[i].timeout_us = timeout_us;
>> +
>> +		if (i % params->periods_cycle == 0)
>> +			workers[i].period_us = params->job_base_period_us;
>> +		else
>> +			workers[i].period_us = workers[i - 1].period_us * 2;
> 
> Are the two if statements effectively equivalent to:
> 
>   period_us = params->job_base_period_us << (i % params->periods_cycle);
> 
> ?

Yes. Actually, I was very undecided between using the closed form and the
iterative form. In the end, I went with the iterative one because I thought
it would be easier to understand and maintain, and this code is not
performance-critical. However, I don't have a strong opinion on this.
Either way works for me.

> Also, do you have an idea how to locally calculate a suitable 
> periods_cycle instead of having to come up with a number in the 
> drm_sched_interleaved_params array. Just strikes me as hard to know what 
> to put in there if someone would want to add a test.

Please see below.
 
>> +
>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>> +							      DRM_SCHED_PRIORITY_NORMAL,
>> +							      ctx->sched);
>> +
>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>> +						workers[i].entity);
>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>> +
>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>> +
>> +		for (j = 0; j < params->num_jobs; j++) {
>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>> +								    workers[i].entity);
>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>> +			KUNIT_ASSERT_FALSE(test, done);
> 
> Same as above, this is asserted by a basic test case so I'd just remove it.
> 
>> +		}
>> +	}
>> +
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
>> +		queue_work(ctx->sub_wq, &workers[i].work);
>> +	}
>> +
>> +	complete_all(&ctx->wait_go);
>> +	flush_workqueue(ctx->sub_wq);
>> +
>> +	for (i = 0; i < params->num_workers; i++) {
>> +		for (j = 0; j < params->num_jobs; j++) {
>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>> +			KUNIT_ASSERT_TRUE(test, done);
>> +		}
>> +	}
> 
> Here as well you could wait just for the last job per worker.
> 
> On the overal submission pattern - Don't you end up with a very uneven 
> activity between the workers? Because the job duration is doubling and 
> workers are single-shot, once the low index workers are done they are 
> done, and all that remains are the higher ones, and so the wave of fewer 
> and fewer active workers continues. Low index worker do not end up 
> racing with the job completions of the high index workers but only 
> between themselves, and even that with the cycle=10 workers=16 case is 
> even more limited.

Good point.

> Alternative approach could be to set a per test time budget and just 
> keep the workers submitting until over. It would be simpler to 
> understand and there would be more submit/complete overlap.

I agree. Using a test time budget and having workers continuously
submit jobs until it expires would make better use of the test time.
I'm thinking that the simplest and most straightforward approach would
be to cyclically distribute periods among workers until they reach the
the largest possibile value below test duration, which would coincide
with the hyperperiod. This would also solve the issue of selecting a
suitable periods_cycle parameter that you mentioned earlier.
In practice, something like this:

drm_sched_interleaved_params [...]
{
        .num_workers = N
        .test_max_time = T
        .job_base_period_us = P         /* Small GPU job, 100 us */
}

period_us = job_base_period_us;
for (i = 0; i < params->num_workers; i++) {
        workers[i].period_us = period_us;        
        period_us *= 2;
        if (period_us > test_max_time)
                period_us = job_base_period_us;
}


What do you think?

Thanks,
Marco

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-25  9:03     ` Tvrtko Ursulin
@ 2026-03-17 17:27       ` Marco Pagani
  2026-03-18 10:44         ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Marco Pagani @ 2026-03-17 17:27 UTC (permalink / raw)
  To: Tvrtko Ursulin, phasta
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 25/02/2026 10:03, Tvrtko Ursulin wrote:
> 
> On 25/02/2026 07:47, Philipp Stanner wrote:
>> On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
>>>
>>> On 19/02/2026 14:07, Marco Pagani wrote:
>>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>>> case for parallel job submission and (ii) a secondary test case for
>>>> interleaved job submission and completion. In the first test case, worker
>>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>>> manually advanced. In the second test case, worker threads periodically
>>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>>> harmonic, starting from a base period, to allow interleaving between
>>>> submission and completion. To avoid impractically large execution times,
>>>> periods are cycled. The timeline is advanced automatically when the jobs
>>>> completes. This models how job submission from userspace (in process
>>>> context) may interleave with job completion (hrtimer callback interrupt
>>>> context, in the test case) by a physical device.
>>
>> I still maintain the opinion expressed the last time: that the commit
>> message should make explicit why the patch / test is added. Which this
>> doesn't do. It just says: "We add X", but not "Currently, the problem
>> is that YZ, thus we need X".
>> (also breaking longer commit messages into paragraphs is nice)
>>
>> Also see my comments about interleaved submits below.
> 
> I'll address the ones addressed to me.
> 
> 8><
> 
>>>> +struct drm_sched_parallel_params {
>>>> +	const char *description;
>>>> +	unsigned int num_jobs;
>>>> +	unsigned int num_workers;
>>>> +};
>>>> +
>>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>>> +	{
>>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>>
>>> Each worker has own entity so the description is a bit confusing.
>>
>> Do you have a suggestion for a better one?
> 
> Along the lines of:
> 
> "Multiple workers submitting multiple jobs from their entity"
> 
> 8><
> 
>>>> +	}
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>>> +	}
>>>> +
>>>> +	complete_all(&ctx->wait_go);
>>>> +	flush_workqueue(ctx->sub_wq);
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>>> +								 HZ);
>>
>> same
>>
>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>> +
>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>> +			KUNIT_ASSERT_FALSE(test, done);
>>>
>>> This second assert does not seem to be bringing much value, but instead
>>> makes the reader ask themselves why it is there. Remove it?
>>>
>>> Hmm in fact this whole loop could be removed. All that it is needed
>>> below is to wait for the last job to be completed.
>>
>> I suppose it's being tested whether all jobs are finished. That sounds
>> clean and not harmful to me.
> 
> No, it is assert false. It is testing the jobs have been scheduled but 
> not completed before the timeline is manually advanced. Both those 
> behaviours are already covered by the existing basic test cases.
> 
> In my view the general best practice is to focus on the thing being 
> tested, which in this case is the submission side of things. The rest 
> can just distract the reader. And in this case that is parallel 
> submission, which is all done and dusted by the time flush_workqueue 
> above finishes. Everything after that point is just test cleanup.

I see what you mean. By that point, concurrent submission is complete, so it
may be considered outside the scope of this test. However, my intent was to
check that the basic behaviour of the scheduler also holds after the concurrent
submissions of multiple jobs, which is not covered by the basic test case.
That said, I'm open to removing those asserts if you and Philipp believe
they are redundant.

Thanks,
Marco

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-17 17:27       ` Marco Pagani
@ 2026-03-18 10:44         ` Tvrtko Ursulin
  2026-03-18 18:15           ` Marco Pagani
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2026-03-18 10:44 UTC (permalink / raw)
  To: Marco Pagani, phasta
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter


On 17/03/2026 17:27, Marco Pagani wrote:
> On 25/02/2026 10:03, Tvrtko Ursulin wrote:
>>
>> On 25/02/2026 07:47, Philipp Stanner wrote:
>>> On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 19/02/2026 14:07, Marco Pagani wrote:
>>>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>>>> case for parallel job submission and (ii) a secondary test case for
>>>>> interleaved job submission and completion. In the first test case, worker
>>>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>>>> manually advanced. In the second test case, worker threads periodically
>>>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>>>> harmonic, starting from a base period, to allow interleaving between
>>>>> submission and completion. To avoid impractically large execution times,
>>>>> periods are cycled. The timeline is advanced automatically when the jobs
>>>>> completes. This models how job submission from userspace (in process
>>>>> context) may interleave with job completion (hrtimer callback interrupt
>>>>> context, in the test case) by a physical device.
>>>
>>> I still maintain the opinion expressed the last time: that the commit
>>> message should make explicit why the patch / test is added. Which this
>>> doesn't do. It just says: "We add X", but not "Currently, the problem
>>> is that YZ, thus we need X".
>>> (also breaking longer commit messages into paragraphs is nice)
>>>
>>> Also see my comments about interleaved submits below.
>>
>> I'll address the ones addressed to me.
>>
>> 8><
>>
>>>>> +struct drm_sched_parallel_params {
>>>>> +	const char *description;
>>>>> +	unsigned int num_jobs;
>>>>> +	unsigned int num_workers;
>>>>> +};
>>>>> +
>>>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>>>> +	{
>>>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>>>
>>>> Each worker has own entity so the description is a bit confusing.
>>>
>>> Do you have a suggestion for a better one?
>>
>> Along the lines of:
>>
>> "Multiple workers submitting multiple jobs from their entity"
>>
>> 8><
>>
>>>>> +	}
>>>>> +
>>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>>>> +	}
>>>>> +
>>>>> +	complete_all(&ctx->wait_go);
>>>>> +	flush_workqueue(ctx->sub_wq);
>>>>> +
>>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>>>> +								 HZ);
>>>
>>> same
>>>
>>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>>> +
>>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>>> +			KUNIT_ASSERT_FALSE(test, done);
>>>>
>>>> This second assert does not seem to be bringing much value, but instead
>>>> makes the reader ask themselves why it is there. Remove it?
>>>>
>>>> Hmm in fact this whole loop could be removed. All that it is needed
>>>> below is to wait for the last job to be completed.
>>>
>>> I suppose it's being tested whether all jobs are finished. That sounds
>>> clean and not harmful to me.
>>
>> No, it is assert false. It is testing the jobs have been scheduled but
>> not completed before the timeline is manually advanced. Both those
>> behaviours are already covered by the existing basic test cases.
>>
>> In my view the general best practice is to focus on the thing being
>> tested, which in this case is the submission side of things. The rest
>> can just distract the reader. And in this case that is parallel
>> submission, which is all done and dusted by the time flush_workqueue
>> above finishes. Everything after that point is just test cleanup.
> 
> I see what you mean. By that point, concurrent submission is complete, so it
> may be considered outside the scope of this test. However, my intent was to
> check that the basic behaviour of the scheduler also holds after the concurrent
> submissions of multiple jobs, which is not covered by the basic test case.
> That said, I'm open to removing those asserts if you and Philipp believe
> they are redundant.

Some time has passed and I don't exactly remember now with the trimmed 
quote. If this part of the test is using manual timeline advancement 
then the assert is basically testing the mock scheduler (not the DRM 
scheduler). For me that is lukewarm, but if you insist feel free to keep it.

Regards,

Tvrtko


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-17 17:04   ` Marco Pagani
@ 2026-03-18 10:51     ` Tvrtko Ursulin
  2026-03-18 17:23       ` Marco Pagani
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2026-03-18 10:51 UTC (permalink / raw)
  To: Marco Pagani, Philipp Stanner
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter


On 17/03/2026 17:04, Marco Pagani wrote:
> On 23/02/2026 17:25, Tvrtko Ursulin wrote:
>>
>> On 19/02/2026 14:07, Marco Pagani wrote:
>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>> case for parallel job submission and (ii) a secondary test case for
>>> interleaved job submission and completion. In the first test case, worker
>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>> manually advanced. In the second test case, worker threads periodically
>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>> harmonic, starting from a base period, to allow interleaving between
>>> submission and completion. To avoid impractically large execution times,
>>> periods are cycled. The timeline is advanced automatically when the jobs
>>> completes. This models how job submission from userspace (in process
>>> context) may interleave with job completion (hrtimer callback interrupt
>>> context, in the test case) by a physical device.
>>>
>>> Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
>>> ---
>>> Changes in v1:
>>> - Split the original suite into two test cases (Tvrtko Ursulin).
>>> - General test refactoring and variable renaming for clarity.
>>> - Changed parameters to have a fairer workload distribution.
>>> - Improved cleanup logic.
>>> - Added kunit_info() prints for tracing workers.
>>> ---
>>>    drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
>>>    1 file changed, 338 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> index 82a41a456b0a..391edcbaf43a 100644
>>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> @@ -2,6 +2,8 @@
>>>    /* Copyright (c) 2025 Valve Corporation */
>>>    
>>>    #include <linux/delay.h>
>>> +#include <linux/completion.h>
>>> +#include <linux/workqueue.h>
>>>    
>>>    #include "sched_tests.h"
>>>    
>>> @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>>    	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>>    }
>>>    
>>> +struct sched_concurrent_context {
>>> +	struct drm_mock_scheduler *sched;
>>> +	struct workqueue_struct *sub_wq;
>>> +	struct kunit *test;
>>> +	struct completion wait_go;
>>> +};
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>>> +			    struct drm_mock_scheduler *);
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>>> +			    struct drm_mock_sched_entity *);
>>> +
>>> +static void complete_destroy_workqueue(void *context)
>>> +{
>>> +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +
>>> +	destroy_workqueue(ctx->sub_wq);
>>> +}
>>> +
>>> +static int drm_sched_concurrent_init(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_context *ctx;
>>> +	int ret;
>>> +
>>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx);
>>> +
>>> +	init_completion(&ctx->wait_go);
>>> +
>>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>>> +				      WQ_UNBOUND_MAX_ACTIVE);
>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	ctx->test = test;
>>> +	test->priv = ctx;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +struct drm_sched_parallel_params {
>>> +	const char *description;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_workers;
>>> +};
>>> +
>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>> +	{
>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>
>> Each worker has own entity so the description is a bit confusing.
> 
> Right, I'll change it "Multiple workers submitting multiple jobs from their entity"
> as you suggested in the following email.
> 
>>> +		.num_jobs = 4,
>>> +		.num_workers = 16,
>>> +	},
>>> +};
>>> +
>>> +static void
>>> +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
>>> +{
>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>> +}
>>> +
>>> +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
>>> +
>>> +struct parallel_worker {
>>> +	struct work_struct work;
>>> +	struct sched_concurrent_context *ctx;
>>> +	struct drm_mock_sched_entity *entity;
>>> +	struct drm_mock_sched_job **jobs;
>>> +	unsigned int id;
>>> +};
>>> +
>>> +static void drm_sched_parallel_worker(struct work_struct *work)
>>> +{
>>> +	const struct drm_sched_parallel_params *params;
>>> +	struct sched_concurrent_context *test_ctx;
>>> +	struct parallel_worker *worker;
>>> +	unsigned int i;
>>> +
>>> +	worker = container_of(work, struct parallel_worker, work);
>>> +	test_ctx = worker->ctx;
>>> +	params = test_ctx->test->param_value;
>>> +
>>> +	wait_for_completion(&test_ctx->wait_go);
>>> +
>>> +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
>>> +
>>> +	for (i = 0; i < params->num_jobs; i++)
>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>> +}
>>> +
>>> +/*
>>> + * Spawns workers that submit a sequence of jobs to the mock scheduler.
>>> + * Once all jobs are submitted, the timeline is manually advanced.
>>> + */
>>> +static void drm_sched_parallel_submit_test(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_context *ctx = test->priv;
>>> +	const struct drm_sched_parallel_params *params = test->param_value;
>>> +	struct parallel_worker *workers;
>>> +	unsigned int i, j, completed_jobs;
>>> +	bool done;
>>> +	int ret;
>>> +
>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>> +				GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>> +
>>> +	/*
>>> +	 * Init workers only after all jobs and entities have been successfully
>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>> +	 * can be simplified.
>>> +	 */
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		workers[i].id = i;
>>> +		workers[i].ctx = ctx;
>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>> +							      ctx->sched);
>>> +
>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>> +						workers[i].entity);
>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>> +
>>> +		for (j = 0; j < params->num_jobs; j++)
>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>> +								    workers[i].entity);
>>> +	}
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>> +	}
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +	flush_workqueue(ctx->sub_wq);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>> +								 HZ);
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_FALSE(test, done);
>>
>> This second assert does not seem to be bringing much value, but instead
>> makes the reader ask themselves why it is there. Remove it?
>>
>> Hmm in fact this whole loop could be removed. All that it is needed
>> below is to wait for the last job to be completed.
> 
> I'll reply to this in the following mail since Philip joined the conversation.
>   
>>> +		}
>>> +	}
>>> +
>>> +	completed_jobs = drm_mock_sched_advance(ctx->sched,
>>> +						params->num_workers * params->num_jobs);
>>> +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +		}
>>> +	}
>>
>> So here, after advancing you just need to wait on the last job to complete.
>>
>> Not a deal breaker to have it verbose but usually the less is better and
>> easier to spot the key thing being tested and what are the functional steps.
>>
>>> +}
>>> +
>>> +struct drm_sched_interleaved_params {
>>> +	const char *description;
>>> +	unsigned int job_base_period_us;
>>> +	unsigned int periods_cycle;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_workers;
>>> +};
>>> +
>>> +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
>>> +	{
>>> +		.description = "Workers submitting single jobs against a single entity",
>>
>> As with the parallel description.
> 
> Agreed.
>   
>>> +		.job_base_period_us = 100,
>>> +		.periods_cycle = 10,
>>> +		.num_jobs = 1,
>>> +		.num_workers = 32,
>>> +	},
>>> +	{
>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>> +		.job_base_period_us = 100,
>>> +		.periods_cycle = 10,
>>> +		.num_jobs = 4,
>>> +		.num_workers = 16,
>>> +	},
>>> +};
>>> +
>>> +static void
>>> +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
>>> +{
>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>> +}
>>> +
>>> +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
>>> +		  drm_sched_interleaved_desc);
>>> +
>>> +struct interleaved_worker {
>>> +	struct work_struct work;
>>> +	struct sched_concurrent_context *ctx;
>>> +	struct drm_mock_sched_entity *entity;
>>> +	struct drm_mock_sched_job **jobs;
>>> +	unsigned int id;
>>> +	unsigned int period_us;
>>> +	unsigned int timeout_us;
>>> +};
>>> +
>>> +static void drm_sched_interleaved_worker(struct work_struct *work)
>>> +{
>>> +	const struct drm_sched_interleaved_params *params;
>>> +	struct sched_concurrent_context *test_ctx;
>>> +	struct interleaved_worker *worker;
>>> +	unsigned int i;
>>> +	bool done;
>>> +
>>> +	worker = container_of(work, struct interleaved_worker, work);
>>> +	test_ctx = worker->ctx;
>>> +	params = test_ctx->test->param_value;
>>> +
>>> +	wait_for_completion(&test_ctx->wait_go);
>>> +
>>> +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
>>> +		   worker->id, worker->period_us);
>>> +
>>> +	/* Release jobs as a periodic sequence */
>>> +	for (i = 0; i < params->num_jobs; i++) {
>>> +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>> +
>>> +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
>>> +							usecs_to_jiffies(worker->timeout_us));
>>> +		if (!done)
>>> +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
>>> +				   i, worker->id);
>>> +	}
>>> +}
>>> +
>>> +/*
>>> + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
>>> + * Uses harmonic periods to allow interleaving and cycles through them to prevent
>>> + * excessively large execution times. Since the scheduler serializes jobs from all
>>> + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
>>> + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
>>> + * the workers.
>>> + */
>>> +static void drm_sched_interleaved_submit_test(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_context *ctx = test->priv;
>>> +	const struct drm_sched_interleaved_params *params = test->param_value;
>>> +	struct interleaved_worker *workers;
>>> +	unsigned int period_max_us, timeout_us;
>>> +	unsigned int i, j;
>>> +	bool done;
>>> +	int ret;
>>> +
>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>> +				GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>> +
>>> +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
>>> +	timeout_us = params->num_workers * period_max_us * 2;
>>> +
>>> +	/*
>>> +	 * Init workers only after all jobs and entities have been successfully
>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>> +	 * can be simplified.
>>> +	 */
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		workers[i].id = i;
>>> +		workers[i].ctx = ctx;
>>> +		workers[i].timeout_us = timeout_us;
>>> +
>>> +		if (i % params->periods_cycle == 0)
>>> +			workers[i].period_us = params->job_base_period_us;
>>> +		else
>>> +			workers[i].period_us = workers[i - 1].period_us * 2;
>>
>> Are the two if statements effectively equivalent to:
>>
>>    period_us = params->job_base_period_us << (i % params->periods_cycle);
>>
>> ?
> 
> Yes. Actually, I was very undecided between using the closed form and the
> iterative form. In the end, I went with the iterative one because I thought
> it would be easier to understand and maintain, and this code is not
> performance-critical. However, I don't have a strong opinion on this.
> Either way works for me.

AFAIR, and maybe it is just me, the if-else version with referencing the 
previous (i - 1) period took me some time to figure out what it is 
about. My version is obvious to me that it is a repeating cycle (modulo 
operator) of doubling intervals (<<).

> 
>> Also, do you have an idea how to locally calculate a suitable
>> periods_cycle instead of having to come up with a number in the
>> drm_sched_interleaved_params array. Just strikes me as hard to know what
>> to put in there if someone would want to add a test.
> 
> Please see below.
>   
>>> +
>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>> +							      ctx->sched);
>>> +
>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>> +						workers[i].entity);
>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>> +
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>> +								    workers[i].entity);
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_FALSE(test, done);
>>
>> Same as above, this is asserted by a basic test case so I'd just remove it.
>>
>>> +		}
>>> +	}
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>> +	}
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +	flush_workqueue(ctx->sub_wq);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +		}
>>> +	}
>>
>> Here as well you could wait just for the last job per worker.
>>
>> On the overal submission pattern - Don't you end up with a very uneven
>> activity between the workers? Because the job duration is doubling and
>> workers are single-shot, once the low index workers are done they are
>> done, and all that remains are the higher ones, and so the wave of fewer
>> and fewer active workers continues. Low index worker do not end up
>> racing with the job completions of the high index workers but only
>> between themselves, and even that with the cycle=10 workers=16 case is
>> even more limited.
> 
> Good point.
> 
>> Alternative approach could be to set a per test time budget and just
>> keep the workers submitting until over. It would be simpler to
>> understand and there would be more submit/complete overlap.
> 
> I agree. Using a test time budget and having workers continuously
> submit jobs until it expires would make better use of the test time.
> I'm thinking that the simplest and most straightforward approach would
> be to cyclically distribute periods among workers until they reach the
> the largest possibile value below test duration, which would coincide
> with the hyperperiod. This would also solve the issue of selecting a
> suitable periods_cycle parameter that you mentioned earlier.
> In practice, something like this:
> 
> drm_sched_interleaved_params [...]
> {
>          .num_workers = N
>          .test_max_time = T
>          .job_base_period_us = P         /* Small GPU job, 100 us */
> }
> 
> period_us = job_base_period_us;
> for (i = 0; i < params->num_workers; i++) {
>          workers[i].period_us = period_us;
>          period_us *= 2;
>          if (period_us > test_max_time)
>                  period_us = job_base_period_us;
> }
> 
> 
> What do you think?


Again some time has passed so rather than going to re-read your patch I 
will go from memory. IIRC I was thinking something really dumb and 100% 
time bound with no need to think when coding and reviewing. Each thread 
simply does:

ktime_t start = ktime_get();

do {
  .. thread doing its submission pattern thing ..
} while (ktime_to_ms(ktime_sub(ktime_get(), start)) < test->time_ms);

May miss the time target by a job period_us but who cares.

Regards,

Tvrtko


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-02-25  7:47   ` Philipp Stanner
  2026-02-25  9:03     ` Tvrtko Ursulin
@ 2026-03-18 12:43     ` Marco Pagani
  1 sibling, 0 replies; 13+ messages in thread
From: Marco Pagani @ 2026-03-18 12:43 UTC (permalink / raw)
  To: phasta, Tvrtko Ursulin
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter


On 25/02/2026 08:47, Philipp Stanner wrote:
> On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
>>
>> On 19/02/2026 14:07, Marco Pagani wrote:
>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>> case for parallel job submission and (ii) a secondary test case for
>>> interleaved job submission and completion. In the first test case, worker
>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>> manually advanced. In the second test case, worker threads periodically
>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>> harmonic, starting from a base period, to allow interleaving between
>>> submission and completion. To avoid impractically large execution times,
>>> periods are cycled. The timeline is advanced automatically when the jobs
>>> completes. This models how job submission from userspace (in process
>>> context) may interleave with job completion (hrtimer callback interrupt
>>> context, in the test case) by a physical device.
> 
> I still maintain the opinion expressed the last time: that the commit
> message should make explicit why the patch / test is added. Which this
> doesn't do. It just says: "We add X", but not "Currently, the problem
> is that YZ, thus we need X".
> (also breaking longer commit messages into paragraphs is nice)

Okay, got it. I'll rewrite it and break it up into paragraphs,
highlighting that current tests do not cover concurrent submission,
so we need some tests for that case.

> Also see my comments about interleaved submits below.
> 
>>>
>>> Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
>>> ---
>>> Changes in v1:
>>> - Split the original suite into two test cases (Tvrtko Ursulin).
>>> - General test refactoring and variable renaming for clarity.
>>> - Changed parameters to have a fairer workload distribution.
>>> - Improved cleanup logic.
>>> - Added kunit_info() prints for tracing workers.
>>> ---
>>>   drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
>>>   1 file changed, 338 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> index 82a41a456b0a..391edcbaf43a 100644
>>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>> @@ -2,6 +2,8 @@
>>>   /* Copyright (c) 2025 Valve Corporation */
>>>   
>>>   #include <linux/delay.h>
>>> +#include <linux/completion.h>
>>> +#include <linux/workqueue.h>
>>>   
>>>   #include "sched_tests.h"
>>>   
>>> @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>>   	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>>   }
>>>   
>>> +struct sched_concurrent_context {
>>> +	struct drm_mock_scheduler *sched;
>>> +	struct workqueue_struct *sub_wq;
>>> +	struct kunit *test;
>>> +	struct completion wait_go;
>>> +};
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>>> +			    struct drm_mock_scheduler *);
>>> +
>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>>> +			    struct drm_mock_sched_entity *);
>>> +
>>> +static void complete_destroy_workqueue(void *context)
>>> +{
>>> +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +
> 
> nit: do we need that empty line

I can remove it if you prefer.

>>> +	destroy_workqueue(ctx->sub_wq);
>>> +}
>>> +
>>> +static int drm_sched_concurrent_init(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_context *ctx;
>>> +	int ret;
>>> +
>>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx);
>>> +
>>> +	init_completion(&ctx->wait_go);
>>> +
>>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>>> +				      WQ_UNBOUND_MAX_ACTIVE);
>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>>> +
>>> +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +	ctx->test = test;
>>> +	test->priv = ctx;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +struct drm_sched_parallel_params {
>>> +	const char *description;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_workers;
>>> +};
>>> +
>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>> +	{
>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>
>> Each worker has own entity so the description is a bit confusing.
> 
> Do you have a suggestion for a better one?

Tvrtko provided a good suggestion later in the discussion. If it is also okay
with you, I'll be using his phrasing.

>>
>>> +		.num_jobs = 4,
>>> +		.num_workers = 16,
>>> +	},
>>> +};
>>> +
>>> +static void
>>> +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
>>> +{
>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>> +}
>>> +
>>> +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
>>> +
>>> +struct parallel_worker {
>>> +	struct work_struct work;
>>> +	struct sched_concurrent_context *ctx;
>>> +	struct drm_mock_sched_entity *entity;
>>> +	struct drm_mock_sched_job **jobs;
>>> +	unsigned int id;
>>> +};
>>> +
>>> +static void drm_sched_parallel_worker(struct work_struct *work)
>>> +{
>>> +	const struct drm_sched_parallel_params *params;
>>> +	struct sched_concurrent_context *test_ctx;
>>> +	struct parallel_worker *worker;
>>> +	unsigned int i;
>>> +
>>> +	worker = container_of(work, struct parallel_worker, work);
>>> +	test_ctx = worker->ctx;
>>> +	params = test_ctx->test->param_value;
>>> +
>>> +	wait_for_completion(&test_ctx->wait_go);
>>> +
>>> +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
>>> +
>>> +	for (i = 0; i < params->num_jobs; i++)
>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>> +}
>>> +
>>> +/*
>>> + * Spawns workers that submit a sequence of jobs to the mock scheduler.
>>> + * Once all jobs are submitted, the timeline is manually advanced.
>>> + */
>>> +static void drm_sched_parallel_submit_test(struct kunit *test)
>>> +{
>>> +	struct sched_concurrent_context *ctx = test->priv;
>>> +	const struct drm_sched_parallel_params *params = test->param_value;
>>> +	struct parallel_worker *workers;
>>> +	unsigned int i, j, completed_jobs;
>>> +	bool done;
>>> +	int ret;
>>> +
>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>> +				GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>> +
>>> +	/*
>>> +	 * Init workers only after all jobs and entities have been successfully
>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>> +	 * can be simplified.
>>> +	 */
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		workers[i].id = i;
>>> +		workers[i].ctx = ctx;
>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>> +							      ctx->sched);
>>> +
>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>> +						workers[i].entity);
>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>> +
>>> +		for (j = 0; j < params->num_jobs; j++)
>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>> +								    workers[i].entity);
> 
> The kernel doesn't strictly require the 80 column limit anymore. In
> some cases it can make sense to exceed it a bit to avoid an ugly line
> break.
> 
> Often one can make code a bit more readable by creating a helper
> variable such as
> 
> struct xy *pos = &workers[i];
> 
> You need workers[i] at meany places, so that definitely seems handy.

I think using the subscript notation (workers[i]) makes the code easier to
read by highlighting that we are dealing with the i-th element of a group
of workers. At the same time, I understand your point about removing ugly
line breaks. I'll rework the code to remove some clutter.
 
>>> +	}
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>> +	}
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +	flush_workqueue(ctx->sub_wq);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>> +								 HZ);
> 
> same
> 
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_FALSE(test, done);
>>
>> This second assert does not seem to be bringing much value, but instead 
>> makes the reader ask themselves why it is there. Remove it?
>>
>> Hmm in fact this whole loop could be removed. All that it is needed 
>> below is to wait for the last job to be completed.
> 
> I suppose it's being tested whether all jobs are finished. That sounds
> clean and not harmful to me.

I addressed this earlier in my reply to Tvrtko.

>>
>>> +		}
>>> +	}
>>> +
>>> +	completed_jobs = drm_mock_sched_advance(ctx->sched,
>>> +						params->num_workers * params->num_jobs);
>>> +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
> 
> `params` doesn't change here anymore, so the lengthy multiplication
> here, which is needed at two places, can be made a const at the top,
> making the code more readable.

Agreed.

>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +		}
>>> +	}
>>
>> So here, after advancing you just need to wait on the last job to complete.
>>
>> Not a deal breaker to have it verbose but usually the less is better and 
>> easier to spot the key thing being tested and what are the functional steps.
> 
> depends a bit on what Marco intended. Looks like "better safe than
> sorry" to me?

Same as above.

>>
>>> +}
>>> +
>>> +struct drm_sched_interleaved_params {
>>> +	const char *description;
>>> +	unsigned int job_base_period_us;
>>> +	unsigned int periods_cycle;
>>> +	unsigned int num_jobs;
>>> +	unsigned int num_workers;
>>> +};
>>> +
>>> +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
>>> +	{
>>> +		.description = "Workers submitting single jobs against a single entity",
>>
>> As with the parallel description.
> 
> I think (see again my comment about the commit message) what the test
> wants implement is threaded submission to a entity shared between
> threads. That can certainly be made more verbose.

Same as above.

>>
>>> +		.job_base_period_us = 100,
>>> +		.periods_cycle = 10,
>>> +		.num_jobs = 1,
>>> +		.num_workers = 32,
>>> +	},
>>> +	{
>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>> +		.job_base_period_us = 100,
>>> +		.periods_cycle = 10,
>>> +		.num_jobs = 4,
>>> +		.num_workers = 16,
>>> +	},
>>> +};
>>> +
>>> +static void
>>> +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
>>> +{
>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>> +}
>>> +
>>> +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
>>> +		  drm_sched_interleaved_desc);
>>> +
>>> +struct interleaved_worker {
>>> +	struct work_struct work;
>>> +	struct sched_concurrent_context *ctx;
>>> +	struct drm_mock_sched_entity *entity;
>>> +	struct drm_mock_sched_job **jobs;
>>> +	unsigned int id;
>>> +	unsigned int period_us;
>>> +	unsigned int timeout_us;
>>> +};
>>> +
>>> +static void drm_sched_interleaved_worker(struct work_struct *work)
>>> +{
>>> +	const struct drm_sched_interleaved_params *params;
>>> +	struct sched_concurrent_context *test_ctx;
>>> +	struct interleaved_worker *worker;
>>> +	unsigned int i;
>>> +	bool done;
>>> +
>>> +	worker = container_of(work, struct interleaved_worker, work);
>>> +	test_ctx = worker->ctx;
>>> +	params = test_ctx->test->param_value;
>>> +
>>> +	wait_for_completion(&test_ctx->wait_go);
>>> +
>>> +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
>>> +		   worker->id, worker->period_us);
>>> +
>>> +	/* Release jobs as a periodic sequence */
>>> +	for (i = 0; i < params->num_jobs; i++) {
>>> +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>> +
>>> +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
> 
> worker->jobs[i] is used at 3 places and can be made a helper variable.

Same as above. I can change it, but I think that using the subscript operator
makes the code easier to read and understand in this case. I'll try with a
helper variable and see if that works.

>>> +							usecs_to_jiffies(worker->timeout_us));
>>> +		if (!done)
>>> +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
>>> +				   i, worker->id);
>>> +	}
>>> +}
>>> +
>>> +/*
>>> + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
>>> + * Uses harmonic periods to allow interleaving and cycles through them to prevent
>>> + * excessively large execution times.
>>>
> 
> What are "large execution times", like too large for the test
> framework?

Right, this is unclear. The meaning is "prevent large execution times" to keep
the test execution time within a "reasonable" duration of a few seconds.
I'll rework the comment.

>>>  Since the scheduler serializes jobs from all
>>> + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
>>> + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
>>> + * the workers.
>>> + */
>>> +static void drm_sched_interleaved_submit_test(struct kunit *test)
> 
> It appears to me that this is effectively a separate test, which might
> want its own patch. What is the motivation behind adding it?

The overall goal of this patch is to add a new test suite for testing concurrent
submission. Initially, the suite contained just one test case, but then we agreed
to split it into two separate test cases. However, since we are still dealing with
a single test suite, I believe it can be sent as a single patch, as it has already
been done in the past according to the log.
>>> +{
>>> +	struct sched_concurrent_context *ctx = test->priv;
>>> +	const struct drm_sched_interleaved_params *params = test->param_value;
>>> +	struct interleaved_worker *workers;
>>> +	unsigned int period_max_us, timeout_us;
>>> +	unsigned int i, j;
>>> +	bool done;
>>> +	int ret;
>>> +
>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>> +				GFP_KERNEL);
>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>> +
>>> +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
>>> +	timeout_us = params->num_workers * period_max_us * 2;
>>> +
>>> +	/*
>>> +	 * Init workers only after all jobs and entities have been successfully
>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>> +	 * can be simplified.
>>> +	 */
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		workers[i].id = i;
>>> +		workers[i].ctx = ctx;
>>> +		workers[i].timeout_us = timeout_us;
> 
> helper variable for workers[i].

Same as above.

> 
> P.
> 
> 
>>> +
>>> +		if (i % params->periods_cycle == 0)
>>> +			workers[i].period_us = params->job_base_period_us;
>>> +		else
>>> +			workers[i].period_us = workers[i - 1].period_us * 2;
>>
>> Are the two if statements effectively equivalent to:
>>
>>   period_us = params->job_base_period_us << (i % params->periods_cycle);
>>
>> ?
>>
>> Also, do you have an idea how to locally calculate a suitable 
>> periods_cycle instead of having to come up with a number in the 
>> drm_sched_interleaved_params array. Just strikes me as hard to know what 
>> to put in there if someone would want to add a test.
>>
>>> +
>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>> +							      ctx->sched);
>>> +
>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>> +						workers[i].entity);
>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>> +
>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>> +
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>> +								    workers[i].entity);
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_FALSE(test, done);
>>
>> Same as above, this is asserted by a basic test case so I'd just remove it.
>>
>>> +		}
>>> +	}
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>> +	}
>>> +
>>> +	complete_all(&ctx->wait_go);
>>> +	flush_workqueue(ctx->sub_wq);
>>> +
>>> +	for (i = 0; i < params->num_workers; i++) {
>>> +		for (j = 0; j < params->num_jobs; j++) {
>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>> +			KUNIT_ASSERT_TRUE(test, done);
>>> +		}
>>> +	}
>>
>> Here as well you could wait just for the last job per worker.
>>
>> On the overal submission pattern - Don't you end up with a very uneven 
>> activity between the workers? Because the job duration is doubling and 
>> workers are single-shot, once the low index workers are done they are
>> done, and all that remains are the higher ones, and so the wave of fewer 
>> and fewer active workers continues. Low index worker do not end up 
>> racing with the job completions of the high index workers but only 
>> between themselves, and even that with the cycle=10 workers=16 case is 
>> even more limited.
>>
>> Alternative approach could be to set a per test time budget and just 
>> keep the workers submitting until over. It would be simpler to 
>> understand and there would be more submit/complete overlap.
>>
>> Regards,
>>
>> Tvrtko
>>
>>> +}
>>> +
>>> +static struct kunit_case drm_sched_concurrent_tests[] = {
>>> +	KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, drm_sched_parallel_gen_params),
>>> +	KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, drm_sched_interleaved_gen_params),
>>> +	{}
>>> +};
>>> +
>>> +static struct kunit_suite drm_sched_concurrent = {
>>> +	.name = "drm_sched_concurrent_tests",
>>> +	.init = drm_sched_concurrent_init,
>>> +	.test_cases = drm_sched_concurrent_tests,
>>> +};
>>> +
>>>   static struct kunit_case drm_sched_cancel_tests[] = {
>>>   	KUNIT_CASE(drm_sched_basic_cancel),
>>>   	{}
>>> @@ -556,6 +893,7 @@ static struct kunit_suite drm_sched_credits = {
>>>   };
>>>   
>>>   kunit_test_suites(&drm_sched_basic,
>>> +		  &drm_sched_concurrent,
>>>   		  &drm_sched_timeout,
>>>   		  &drm_sched_cancel,
>>>   		  &drm_sched_priority,
>>
> 

Thanks,
Marco

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-18 10:51     ` Tvrtko Ursulin
@ 2026-03-18 17:23       ` Marco Pagani
  2026-03-19  8:50         ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Marco Pagani @ 2026-03-18 17:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, Philipp Stanner
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 18/03/2026 11:51, Tvrtko Ursulin wrote:
> 
> On 17/03/2026 17:04, Marco Pagani wrote:
>> On 23/02/2026 17:25, Tvrtko Ursulin wrote:
>>>
>>> On 19/02/2026 14:07, Marco Pagani wrote:
>>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>>> case for parallel job submission and (ii) a secondary test case for
>>>> interleaved job submission and completion. In the first test case, worker
>>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>>> manually advanced. In the second test case, worker threads periodically
>>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>>> harmonic, starting from a base period, to allow interleaving between
>>>> submission and completion. To avoid impractically large execution times,
>>>> periods are cycled. The timeline is advanced automatically when the jobs
>>>> completes. This models how job submission from userspace (in process
>>>> context) may interleave with job completion (hrtimer callback interrupt
>>>> context, in the test case) by a physical device.
>>>>
>>>> Signed-off-by: Marco Pagani <marco.pagani@linux.dev>
>>>> ---
>>>> Changes in v1:
>>>> - Split the original suite into two test cases (Tvrtko Ursulin).
>>>> - General test refactoring and variable renaming for clarity.
>>>> - Changed parameters to have a fairer workload distribution.
>>>> - Improved cleanup logic.
>>>> - Added kunit_info() prints for tracing workers.
>>>> ---
>>>>    drivers/gpu/drm/scheduler/tests/tests_basic.c | 338 ++++++++++++++++++
>>>>    1 file changed, 338 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> index 82a41a456b0a..391edcbaf43a 100644
>>>> --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
>>>> @@ -2,6 +2,8 @@
>>>>    /* Copyright (c) 2025 Valve Corporation */
>>>>    
>>>>    #include <linux/delay.h>
>>>> +#include <linux/completion.h>
>>>> +#include <linux/workqueue.h>
>>>>    
>>>>    #include "sched_tests.h"
>>>>    
>>>> @@ -235,6 +237,341 @@ static void drm_sched_basic_cancel(struct kunit *test)
>>>>    	KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
>>>>    }
>>>>    
>>>> +struct sched_concurrent_context {
>>>> +	struct drm_mock_scheduler *sched;
>>>> +	struct workqueue_struct *sub_wq;
>>>> +	struct kunit *test;
>>>> +	struct completion wait_go;
>>>> +};
>>>> +
>>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
>>>> +			    struct drm_mock_scheduler *);
>>>> +
>>>> +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
>>>> +			    struct drm_mock_sched_entity *);
>>>> +
>>>> +static void complete_destroy_workqueue(void *context)
>>>> +{
>>>> +	struct sched_concurrent_context *ctx = (struct sched_concurrent_context *)context;
>>>> +
>>>> +	complete_all(&ctx->wait_go);
>>>> +
>>>> +	destroy_workqueue(ctx->sub_wq);
>>>> +}
>>>> +
>>>> +static int drm_sched_concurrent_init(struct kunit *test)
>>>> +{
>>>> +	struct sched_concurrent_context *ctx;
>>>> +	int ret;
>>>> +
>>>> +	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
>>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx);
>>>> +
>>>> +	init_completion(&ctx->wait_go);
>>>> +
>>>> +	ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
>>>> +
>>>> +	ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
>>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +	/* Use an unbounded workqueue to maximize job submission concurrency */
>>>> +	ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
>>>> +				      WQ_UNBOUND_MAX_ACTIVE);
>>>> +	KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
>>>> +
>>>> +	ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
>>>> +	KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +	ctx->test = test;
>>>> +	test->priv = ctx;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +struct drm_sched_parallel_params {
>>>> +	const char *description;
>>>> +	unsigned int num_jobs;
>>>> +	unsigned int num_workers;
>>>> +};
>>>> +
>>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>>> +	{
>>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>>
>>> Each worker has own entity so the description is a bit confusing.
>>
>> Right, I'll change it "Multiple workers submitting multiple jobs from their entity"
>> as you suggested in the following email.
>>
>>>> +		.num_jobs = 4,
>>>> +		.num_workers = 16,
>>>> +	},
>>>> +};
>>>> +
>>>> +static void
>>>> +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
>>>> +{
>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>> +}
>>>> +
>>>> +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
>>>> +
>>>> +struct parallel_worker {
>>>> +	struct work_struct work;
>>>> +	struct sched_concurrent_context *ctx;
>>>> +	struct drm_mock_sched_entity *entity;
>>>> +	struct drm_mock_sched_job **jobs;
>>>> +	unsigned int id;
>>>> +};
>>>> +
>>>> +static void drm_sched_parallel_worker(struct work_struct *work)
>>>> +{
>>>> +	const struct drm_sched_parallel_params *params;
>>>> +	struct sched_concurrent_context *test_ctx;
>>>> +	struct parallel_worker *worker;
>>>> +	unsigned int i;
>>>> +
>>>> +	worker = container_of(work, struct parallel_worker, work);
>>>> +	test_ctx = worker->ctx;
>>>> +	params = test_ctx->test->param_value;
>>>> +
>>>> +	wait_for_completion(&test_ctx->wait_go);
>>>> +
>>>> +	kunit_info(test_ctx->test, "Parallel worker %u started\n", worker->id);
>>>> +
>>>> +	for (i = 0; i < params->num_jobs; i++)
>>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Spawns workers that submit a sequence of jobs to the mock scheduler.
>>>> + * Once all jobs are submitted, the timeline is manually advanced.
>>>> + */
>>>> +static void drm_sched_parallel_submit_test(struct kunit *test)
>>>> +{
>>>> +	struct sched_concurrent_context *ctx = test->priv;
>>>> +	const struct drm_sched_parallel_params *params = test->param_value;
>>>> +	struct parallel_worker *workers;
>>>> +	unsigned int i, j, completed_jobs;
>>>> +	bool done;
>>>> +	int ret;
>>>> +
>>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>>> +				GFP_KERNEL);
>>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>>> +
>>>> +	/*
>>>> +	 * Init workers only after all jobs and entities have been successfully
>>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>>> +	 * can be simplified.
>>>> +	 */
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		workers[i].id = i;
>>>> +		workers[i].ctx = ctx;
>>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>>> +							      ctx->sched);
>>>> +
>>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>>> +						workers[i].entity);
>>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>>> +
>>>> +		for (j = 0; j < params->num_jobs; j++)
>>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>>> +								    workers[i].entity);
>>>> +	}
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>>> +	}
>>>> +
>>>> +	complete_all(&ctx->wait_go);
>>>> +	flush_workqueue(ctx->sub_wq);
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>>> +								 HZ);
>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>> +
>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>> +			KUNIT_ASSERT_FALSE(test, done);
>>>
>>> This second assert does not seem to be bringing much value, but instead
>>> makes the reader ask themselves why it is there. Remove it?
>>>
>>> Hmm in fact this whole loop could be removed. All that it is needed
>>> below is to wait for the last job to be completed.
>>
>> I'll reply to this in the following mail since Philip joined the conversation.
>>   
>>>> +		}
>>>> +	}
>>>> +
>>>> +	completed_jobs = drm_mock_sched_advance(ctx->sched,
>>>> +						params->num_workers * params->num_jobs);
>>>> +	KUNIT_ASSERT_EQ(test, completed_jobs, params->num_workers * params->num_jobs);
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>> +		}
>>>> +	}
>>>
>>> So here, after advancing you just need to wait on the last job to complete.
>>>
>>> Not a deal breaker to have it verbose but usually the less is better and
>>> easier to spot the key thing being tested and what are the functional steps.
>>>
>>>> +}
>>>> +
>>>> +struct drm_sched_interleaved_params {
>>>> +	const char *description;
>>>> +	unsigned int job_base_period_us;
>>>> +	unsigned int periods_cycle;
>>>> +	unsigned int num_jobs;
>>>> +	unsigned int num_workers;
>>>> +};
>>>> +
>>>> +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
>>>> +	{
>>>> +		.description = "Workers submitting single jobs against a single entity",
>>>
>>> As with the parallel description.
>>
>> Agreed.
>>   
>>>> +		.job_base_period_us = 100,
>>>> +		.periods_cycle = 10,
>>>> +		.num_jobs = 1,
>>>> +		.num_workers = 32,
>>>> +	},
>>>> +	{
>>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>>> +		.job_base_period_us = 100,
>>>> +		.periods_cycle = 10,
>>>> +		.num_jobs = 4,
>>>> +		.num_workers = 16,
>>>> +	},
>>>> +};
>>>> +
>>>> +static void
>>>> +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
>>>> +{
>>>> +	strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
>>>> +}
>>>> +
>>>> +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
>>>> +		  drm_sched_interleaved_desc);
>>>> +
>>>> +struct interleaved_worker {
>>>> +	struct work_struct work;
>>>> +	struct sched_concurrent_context *ctx;
>>>> +	struct drm_mock_sched_entity *entity;
>>>> +	struct drm_mock_sched_job **jobs;
>>>> +	unsigned int id;
>>>> +	unsigned int period_us;
>>>> +	unsigned int timeout_us;
>>>> +};
>>>> +
>>>> +static void drm_sched_interleaved_worker(struct work_struct *work)
>>>> +{
>>>> +	const struct drm_sched_interleaved_params *params;
>>>> +	struct sched_concurrent_context *test_ctx;
>>>> +	struct interleaved_worker *worker;
>>>> +	unsigned int i;
>>>> +	bool done;
>>>> +
>>>> +	worker = container_of(work, struct interleaved_worker, work);
>>>> +	test_ctx = worker->ctx;
>>>> +	params = test_ctx->test->param_value;
>>>> +
>>>> +	wait_for_completion(&test_ctx->wait_go);
>>>> +
>>>> +	kunit_info(test_ctx->test, "Interleaved worker %u with period %u us started\n",
>>>> +		   worker->id, worker->period_us);
>>>> +
>>>> +	/* Release jobs as a periodic sequence */
>>>> +	for (i = 0; i < params->num_jobs; i++) {
>>>> +		drm_mock_sched_job_set_duration_us(worker->jobs[i], worker->period_us);
>>>> +		drm_mock_sched_job_submit(worker->jobs[i]);
>>>> +
>>>> +		done = drm_mock_sched_job_wait_finished(worker->jobs[i],
>>>> +							usecs_to_jiffies(worker->timeout_us));
>>>> +		if (!done)
>>>> +			kunit_info(test_ctx->test, "Job %u of worker %u timedout\n",
>>>> +				   i, worker->id);
>>>> +	}
>>>> +}
>>>> +
>>>> +/*
>>>> + * Spawns workers that submit a periodic sequence of jobs to the mock scheduler.
>>>> + * Uses harmonic periods to allow interleaving and cycles through them to prevent
>>>> + * excessively large execution times. Since the scheduler serializes jobs from all
>>>> + * workers, the timeout is set to the hyperperiod with a 2x safety factor. Entities
>>>> + * and jobs are pre-allocated in the main thread to avoid using KUnit assertions in
>>>> + * the workers.
>>>> + */
>>>> +static void drm_sched_interleaved_submit_test(struct kunit *test)
>>>> +{
>>>> +	struct sched_concurrent_context *ctx = test->priv;
>>>> +	const struct drm_sched_interleaved_params *params = test->param_value;
>>>> +	struct interleaved_worker *workers;
>>>> +	unsigned int period_max_us, timeout_us;
>>>> +	unsigned int i, j;
>>>> +	bool done;
>>>> +	int ret;
>>>> +
>>>> +	workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
>>>> +				GFP_KERNEL);
>>>> +	KUNIT_ASSERT_NOT_NULL(test, workers);
>>>> +
>>>> +	period_max_us = params->job_base_period_us * (1 << params->periods_cycle);
>>>> +	timeout_us = params->num_workers * period_max_us * 2;
>>>> +
>>>> +	/*
>>>> +	 * Init workers only after all jobs and entities have been successfully
>>>> +	 * allocated. In this way, the cleanup logic for when an assertion fail
>>>> +	 * can be simplified.
>>>> +	 */
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		workers[i].id = i;
>>>> +		workers[i].ctx = ctx;
>>>> +		workers[i].timeout_us = timeout_us;
>>>> +
>>>> +		if (i % params->periods_cycle == 0)
>>>> +			workers[i].period_us = params->job_base_period_us;
>>>> +		else
>>>> +			workers[i].period_us = workers[i - 1].period_us * 2;
>>>
>>> Are the two if statements effectively equivalent to:
>>>
>>>    period_us = params->job_base_period_us << (i % params->periods_cycle);
>>>
>>> ?
>>
>> Yes. Actually, I was very undecided between using the closed form and the
>> iterative form. In the end, I went with the iterative one because I thought
>> it would be easier to understand and maintain, and this code is not
>> performance-critical. However, I don't have a strong opinion on this.
>> Either way works for me.
> 
> AFAIR, and maybe it is just me, the if-else version with referencing the 
> previous (i - 1) period took me some time to figure out what it is 
> about. My version is obvious to me that it is a repeating cycle (modulo 
> operator) of doubling intervals (<<).

Right, a for loop with (i - 1) can indeed be confusing. I'll take that
into account.

>>
>>> Also, do you have an idea how to locally calculate a suitable
>>> periods_cycle instead of having to come up with a number in the
>>> drm_sched_interleaved_params array. Just strikes me as hard to know what
>>> to put in there if someone would want to add a test.
>>
>> Please see below.
>>   
>>>> +
>>>> +		workers[i].entity = drm_mock_sched_entity_new(test,
>>>> +							      DRM_SCHED_PRIORITY_NORMAL,
>>>> +							      ctx->sched);
>>>> +
>>>> +		ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
>>>> +						workers[i].entity);
>>>> +		KUNIT_ASSERT_EQ(test, ret, 0);
>>>> +
>>>> +		workers[i].jobs = kunit_kcalloc(test, params->num_jobs,
>>>> +						sizeof(*workers[i].jobs), GFP_KERNEL);
>>>> +		KUNIT_ASSERT_NOT_NULL(test, workers[i].jobs);
>>>> +
>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>> +			workers[i].jobs[j] = drm_mock_sched_job_new(test,
>>>> +								    workers[i].entity);
>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>> +			KUNIT_ASSERT_FALSE(test, done);
>>>
>>> Same as above, this is asserted by a basic test case so I'd just remove it.
>>>
>>>> +		}
>>>> +	}
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		INIT_WORK(&workers[i].work, drm_sched_interleaved_worker);
>>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>>> +	}
>>>> +
>>>> +	complete_all(&ctx->wait_go);
>>>> +	flush_workqueue(ctx->sub_wq);
>>>> +
>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>> +		}
>>>> +	}
>>>
>>> Here as well you could wait just for the last job per worker.
>>>
>>> On the overal submission pattern - Don't you end up with a very uneven
>>> activity between the workers? Because the job duration is doubling and
>>> workers are single-shot, once the low index workers are done they are
>>> done, and all that remains are the higher ones, and so the wave of fewer
>>> and fewer active workers continues. Low index worker do not end up
>>> racing with the job completions of the high index workers but only
>>> between themselves, and even that with the cycle=10 workers=16 case is
>>> even more limited.
>>
>> Good point.
>>
>>> Alternative approach could be to set a per test time budget and just
>>> keep the workers submitting until over. It would be simpler to
>>> understand and there would be more submit/complete overlap.
>>
>> I agree. Using a test time budget and having workers continuously
>> submit jobs until it expires would make better use of the test time.
>> I'm thinking that the simplest and most straightforward approach would
>> be to cyclically distribute periods among workers until they reach the
>> the largest possibile value below test duration, which would coincide
>> with the hyperperiod. This would also solve the issue of selecting a
>> suitable periods_cycle parameter that you mentioned earlier.
>> In practice, something like this:
>>
>> drm_sched_interleaved_params [...]
>> {
>>          .num_workers = N
>>          .test_max_time = T
>>          .job_base_period_us = P         /* Small GPU job, 100 us */
>> }
>>
>> period_us = job_base_period_us;
>> for (i = 0; i < params->num_workers; i++) {
>>          workers[i].period_us = period_us;
>>          period_us *= 2;
>>          if (period_us > test_max_time)
>>                  period_us = job_base_period_us;
>> }
>>
>>
>> What do you think?
> 
> 
> Again some time has passed so rather than going to re-read your patch I 
> will go from memory. IIRC I was thinking something really dumb and 100% 
> time bound with no need to think when coding and reviewing. Each thread 
> simply does:
> 
> ktime_t start = ktime_get();
> 
> do {
>   .. thread doing its submission pattern thing ..
> } while (ktime_to_ms(ktime_sub(ktime_get(), start)) < test->time_ms);
> 
> May miss the time target by a job period_us but who cares.

Sorry for the delay. I got pulled into other things. I left out the worker
execution part since we already agreed on that. Instead, I've replied with
some pseudocode describing a new strategy for period assignments from test
parameters that takes into account your comments.

Thanks,
Marco




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-18 10:44         ` Tvrtko Ursulin
@ 2026-03-18 18:15           ` Marco Pagani
  0 siblings, 0 replies; 13+ messages in thread
From: Marco Pagani @ 2026-03-18 18:15 UTC (permalink / raw)
  To: Tvrtko Ursulin, phasta
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 18/03/2026 11:44, Tvrtko Ursulin wrote:
> 
> On 17/03/2026 17:27, Marco Pagani wrote:
>> On 25/02/2026 10:03, Tvrtko Ursulin wrote:
>>>
>>> On 25/02/2026 07:47, Philipp Stanner wrote:
>>>> On Mon, 2026-02-23 at 16:25 +0000, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 19/02/2026 14:07, Marco Pagani wrote:
>>>>>> Add a new test suite to simulate concurrent job submissions to the DRM
>>>>>> scheduler. The suite includes two initial test cases: (i) a primary test
>>>>>> case for parallel job submission and (ii) a secondary test case for
>>>>>> interleaved job submission and completion. In the first test case, worker
>>>>>> threads concurrently submit jobs to the scheduler and then the timeline is
>>>>>> manually advanced. In the second test case, worker threads periodically
>>>>>> submit a sequence of jobs to the mock scheduler. Periods are chosen as
>>>>>> harmonic, starting from a base period, to allow interleaving between
>>>>>> submission and completion. To avoid impractically large execution times,
>>>>>> periods are cycled. The timeline is advanced automatically when the jobs
>>>>>> completes. This models how job submission from userspace (in process
>>>>>> context) may interleave with job completion (hrtimer callback interrupt
>>>>>> context, in the test case) by a physical device.
>>>>
>>>> I still maintain the opinion expressed the last time: that the commit
>>>> message should make explicit why the patch / test is added. Which this
>>>> doesn't do. It just says: "We add X", but not "Currently, the problem
>>>> is that YZ, thus we need X".
>>>> (also breaking longer commit messages into paragraphs is nice)
>>>>
>>>> Also see my comments about interleaved submits below.
>>>
>>> I'll address the ones addressed to me.
>>>
>>> 8><
>>>
>>>>>> +struct drm_sched_parallel_params {
>>>>>> +	const char *description;
>>>>>> +	unsigned int num_jobs;
>>>>>> +	unsigned int num_workers;
>>>>>> +};
>>>>>> +
>>>>>> +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
>>>>>> +	{
>>>>>> +		.description = "Workers submitting multiple jobs against a single entity",
>>>>>
>>>>> Each worker has own entity so the description is a bit confusing.
>>>>
>>>> Do you have a suggestion for a better one?
>>>
>>> Along the lines of:
>>>
>>> "Multiple workers submitting multiple jobs from their entity"
>>>
>>> 8><
>>>
>>>>>> +	}
>>>>>> +
>>>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>>>> +		INIT_WORK(&workers[i].work, drm_sched_parallel_worker);
>>>>>> +		queue_work(ctx->sub_wq, &workers[i].work);
>>>>>> +	}
>>>>>> +
>>>>>> +	complete_all(&ctx->wait_go);
>>>>>> +	flush_workqueue(ctx->sub_wq);
>>>>>> +
>>>>>> +	for (i = 0; i < params->num_workers; i++) {
>>>>>> +		for (j = 0; j < params->num_jobs; j++) {
>>>>>> +			done = drm_mock_sched_job_wait_scheduled(workers[i].jobs[j],
>>>>>> +								 HZ);
>>>>
>>>> same
>>>>
>>>>>> +			KUNIT_ASSERT_TRUE(test, done);
>>>>>> +
>>>>>> +			done = drm_mock_sched_job_is_finished(workers[i].jobs[j]);
>>>>>> +			KUNIT_ASSERT_FALSE(test, done);
>>>>>
>>>>> This second assert does not seem to be bringing much value, but instead
>>>>> makes the reader ask themselves why it is there. Remove it?
>>>>>
>>>>> Hmm in fact this whole loop could be removed. All that it is needed
>>>>> below is to wait for the last job to be completed.
>>>>
>>>> I suppose it's being tested whether all jobs are finished. That sounds
>>>> clean and not harmful to me.
>>>
>>> No, it is assert false. It is testing the jobs have been scheduled but
>>> not completed before the timeline is manually advanced. Both those
>>> behaviours are already covered by the existing basic test cases.
>>>
>>> In my view the general best practice is to focus on the thing being
>>> tested, which in this case is the submission side of things. The rest
>>> can just distract the reader. And in this case that is parallel
>>> submission, which is all done and dusted by the time flush_workqueue
>>> above finishes. Everything after that point is just test cleanup.
>>
>> I see what you mean. By that point, concurrent submission is complete, so it
>> may be considered outside the scope of this test. However, my intent was to
>> check that the basic behaviour of the scheduler also holds after the concurrent
>> submissions of multiple jobs, which is not covered by the basic test case.
>> That said, I'm open to removing those asserts if you and Philipp believe
>> they are redundant.
> 
> Some time has passed and I don't exactly remember now with the trimmed 
> quote. If this part of the test is using manual timeline advancement 
> then the assert is basically testing the mock scheduler (not the DRM 
> scheduler).

This is a good point. I'm probably going to remove these asserts.

> For me that is lukewarm, but if you insist feel free to keep it.

Thanks,
Marco


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-18 17:23       ` Marco Pagani
@ 2026-03-19  8:50         ` Tvrtko Ursulin
  2026-03-20 11:25           ` Marco Pagani
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2026-03-19  8:50 UTC (permalink / raw)
  To: Marco Pagani, Philipp Stanner
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 18/03/2026 17:23, Marco Pagani wrote:

8><

>>>> Alternative approach could be to set a per test time budget and just
>>>> keep the workers submitting until over. It would be simpler to
>>>> understand and there would be more submit/complete overlap.
>>>
>>> I agree. Using a test time budget and having workers continuously
>>> submit jobs until it expires would make better use of the test time.
>>> I'm thinking that the simplest and most straightforward approach would
>>> be to cyclically distribute periods among workers until they reach the
>>> the largest possibile value below test duration, which would coincide
>>> with the hyperperiod. This would also solve the issue of selecting a
>>> suitable periods_cycle parameter that you mentioned earlier.
>>> In practice, something like this:
>>>
>>> drm_sched_interleaved_params [...]
>>> {
>>>           .num_workers = N
>>>           .test_max_time = T
>>>           .job_base_period_us = P         /* Small GPU job, 100 us */
>>> }
>>>
>>> period_us = job_base_period_us;
>>> for (i = 0; i < params->num_workers; i++) {
>>>           workers[i].period_us = period_us;
>>>           period_us *= 2;
>>>           if (period_us > test_max_time)
>>>                   period_us = job_base_period_us;
>>> }
>>>
>>>
>>> What do you think?
>>
>>
>> Again some time has passed so rather than going to re-read your patch I
>> will go from memory. IIRC I was thinking something really dumb and 100%
>> time bound with no need to think when coding and reviewing. Each thread
>> simply does:
>>
>> ktime_t start = ktime_get();
>>
>> do {
>>    .. thread doing its submission pattern thing ..
>> } while (ktime_to_ms(ktime_sub(ktime_get(), start)) < test->time_ms);
>>
>> May miss the time target by a job period_us but who cares.
> 
> Sorry for the delay. I got pulled into other things. I left out the worker
> execution part since we already agreed on that. Instead, I've replied with
> some pseudocode describing a new strategy for period assignments from test
> parameters that takes into account your comments.

Sorry I misread when I saw test_max_time clamping I thought it was about 
runtime control. I guess it makes sense to clamp it to avoid 
over-shooting by too much. You removed the cyclical nature so I guess in 
practice this will not happen? I mean number of workers vs base period 
you don't expect more than one of them to get clamped?

Regards,

Tvrtko


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission
  2026-03-19  8:50         ` Tvrtko Ursulin
@ 2026-03-20 11:25           ` Marco Pagani
  0 siblings, 0 replies; 13+ messages in thread
From: Marco Pagani @ 2026-03-20 11:25 UTC (permalink / raw)
  To: Tvrtko Ursulin, Philipp Stanner
  Cc: dri-devel, linux-kernel, Matthew Brost, Danilo Krummrich,
	Christian König, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter



On 19/03/2026 09:50, Tvrtko Ursulin wrote:
> 
> 
> On 18/03/2026 17:23, Marco Pagani wrote:
> 
> 8><
> 
>>>>> Alternative approach could be to set a per test time budget and just
>>>>> keep the workers submitting until over. It would be simpler to
>>>>> understand and there would be more submit/complete overlap.
>>>>
>>>> I agree. Using a test time budget and having workers continuously
>>>> submit jobs until it expires would make better use of the test time.
>>>> I'm thinking that the simplest and most straightforward approach would
>>>> be to cyclically distribute periods among workers until they reach the
>>>> the largest possibile value below test duration, which would coincide
>>>> with the hyperperiod. This would also solve the issue of selecting a
>>>> suitable periods_cycle parameter that you mentioned earlier.
>>>> In practice, something like this:
>>>>
>>>> drm_sched_interleaved_params [...]
>>>> {
>>>>           .num_workers = N
>>>>           .test_max_time = T
>>>>           .job_base_period_us = P         /* Small GPU job, 100 us */
>>>> }
>>>>
>>>> period_us = job_base_period_us;
>>>> for (i = 0; i < params->num_workers; i++) {
>>>>           workers[i].period_us = period_us;
>>>>           period_us *= 2;
>>>>           if (period_us > test_max_time)
>>>>                   period_us = job_base_period_us;
>>>> }
>>>>
>>>>
>>>> What do you think?
>>>
>>>
>>> Again some time has passed so rather than going to re-read your patch I
>>> will go from memory. IIRC I was thinking something really dumb and 100%
>>> time bound with no need to think when coding and reviewing. Each thread
>>> simply does:
>>>
>>> ktime_t start = ktime_get();
>>>
>>> do {
>>>    .. thread doing its submission pattern thing ..
>>> } while (ktime_to_ms(ktime_sub(ktime_get(), start)) < test->time_ms);
>>>
>>> May miss the time target by a job period_us but who cares.
>>
>> Sorry for the delay. I got pulled into other things. I left out the worker
>> execution part since we already agreed on that. Instead, I've replied with
>> some pseudocode describing a new strategy for period assignments from test
>> parameters that takes into account your comments.
> 
> Sorry I misread when I saw test_max_time clamping I thought it was about 
> runtime control. I guess it makes sense to clamp it to avoid 
> over-shooting by too much. You removed the cyclical nature so I guess in 
> practice this will not happen? I mean number of workers vs base period 
> you don't expect more than one of them to get clamped?

I haven't removed the cyclical nature of workers submitting jobs. I
omitted that part because I thought we already agreed on it.

Anyway, I realized that unfortunately the strategy of using harmonic
periods to overlap submissions makes no sense given how the mock
scheduler serializes jobs into a single "execution" line. I'm now
thinking that using a narrower range of (multiple) submission periods
would be more effective to stress concurrent submissions.

I'm also thinking that splitting the single execution time budget into
equal shares among workers, and then computing the number of jobs that
fit into that share, is simpler and better suited for a test case
compared to a time-based approach. Let me share some pseudocode for
this new approach:

/* Parameters (test_duration must be larger than base_period)  */
drm_sched_interleaved_params [...]
{
        .num_workers = ...       /* 8 to 32 */
        .test_duration = ...     /* Few seconds */
        .base_period = ...       /* 100 us, small GPU job */
}

/* Setup phase common for all workers. */
workers_share = params->test_duration / params->num_workers;

/* Worker */
drm_sched_interleaved_worker()
{       
        period = (worker->id + 1) * base_period; 
        num_jobs = workers_share / period;

        for (i = 0; i < num_jobs; i++) {
                drm_mock_sched_job_set_duration_us(period);
                /* submit and wait for the job to complete */
        }	
}

What do you think?

Thanks,
Marco




^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-03-20 11:25 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-19 14:07 [PATCH] drm/sched: Add new KUnit test suite for concurrent job submission Marco Pagani
2026-02-23 16:25 ` Tvrtko Ursulin
2026-02-25  7:47   ` Philipp Stanner
2026-02-25  9:03     ` Tvrtko Ursulin
2026-03-17 17:27       ` Marco Pagani
2026-03-18 10:44         ` Tvrtko Ursulin
2026-03-18 18:15           ` Marco Pagani
2026-03-18 12:43     ` Marco Pagani
2026-03-17 17:04   ` Marco Pagani
2026-03-18 10:51     ` Tvrtko Ursulin
2026-03-18 17:23       ` Marco Pagani
2026-03-19  8:50         ` Tvrtko Ursulin
2026-03-20 11:25           ` Marco Pagani

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox