Re: [PATCH v2] drm/sched: Add test suite for concurrent job submissions

From: Philipp Stanner

Date: Fri Apr 10 2026 - 05:19:56 EST

On Fri, 2026-04-10 at 11:07 +0200, Marco Pagani wrote:
>
>
> On 09/04/2026 17:35, Tvrtko Ursulin wrote:
> >
> > On 08/04/2026 16:49, Marco Pagani wrote:
> > > Add a new test suite to simulate concurrent job submissions to the DRM
> > > scheduler, as this functionality is not covered by current test suites.
> > >
> > > The new test suite includes two initial test cases: (i) a test case for
> > > parallel job submission and (ii) a test case for interleaved job
> > > submission and completion. In the first test case, worker threads
> > > concurrently submit jobs to the scheduler, and then the timeline is
> > > manually advanced to complete them in bulk. In the second test case,
> > > worker threads concurrently submit sequences of jobs of different
> > > durations to the mock scheduler using a sliding window to better model
> > > real-world workloads. The timeline is advanced automatically by the
> > > finishing jobs, interleaving submission with completion.
> > >
> > > Signed-off-by: Marco Pagani <marco.pagani@xxxxxxxxx>
> > > ---
> > > Changes in v2:
> > > - Improved test description
> > > - Use multiple job durations instead of harmonic periods/durations
> > > - Improved submission for interleaved test with a sliding window
> > > - Removed unnecessary asserts per Tvrtko's feedback, but kept wait_scheduled
> > > - Changed parameter names from period to duration for clarity
> > > - Used temp variables to reduce line breaks
> > > ---
> > > drivers/gpu/drm/scheduler/tests/tests_basic.c | 356 ++++++++++++++++++
> > > 1 file changed, 356 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > index a5a5a35a87b0..1791b157cfc8 100644
> > > --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c
> > > @@ -1,7 +1,11 @@
> > > // SPDX-License-Identifier: GPL-2.0
> > > /* Copyright (c) 2025 Valve Corporation */
> > >
> > > +#include <linux/completion.h>
> > > #include <linux/delay.h>
> > > +#include <linux/minmax.h>
> > > +#include <linux/time.h>
> > > +#include <linux/workqueue.h>
> > >
> > > #include "sched_tests.h"
> > >
> > > @@ -235,6 +239,357 @@ static void drm_sched_basic_cancel(struct kunit *test)
> > >    KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED);
> > > }
> > >
> > > +struct sched_concurrent_context {
> > > + struct drm_mock_scheduler *sched;
> > > + struct workqueue_struct *sub_wq;
> > > + struct kunit *test;
> > > + struct completion wait_go;
> > > +};
> > > +
> > > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_fini_wrap, drm_mock_sched_fini,
> > > +     struct drm_mock_scheduler *);
> > > +
> > > +KUNIT_DEFINE_ACTION_WRAPPER(drm_mock_sched_entity_free_wrap, drm_mock_sched_entity_free,
> > > +     struct drm_mock_sched_entity *);
> > > +
> > > +static void complete_destroy_workqueue(void *context)
> > > +{
> > > + struct sched_concurrent_context *ctx = context;
> > > +
> > > + complete_all(&ctx->wait_go);
> > > +
> > > + destroy_workqueue(ctx->sub_wq);
> > > +}
> > > +
> > > +static int drm_sched_concurrent_init(struct kunit *test)
> > > +{
> > > + struct sched_concurrent_context *ctx;
> > > + int ret;
> > > +
> > > + ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
> > > + KUNIT_ASSERT_NOT_NULL(test, ctx);
> > > +
> > > + init_completion(&ctx->wait_go);
> > > +
> > > + ctx->sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT);
> > > +
> > > + ret = kunit_add_action_or_reset(test, drm_mock_sched_fini_wrap, ctx->sched);
> > > + KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > + /* Use an unbounded workqueue to maximize job submission concurrency */
> > > + ctx->sub_wq = alloc_workqueue("drm-sched-submitters-wq", WQ_UNBOUND,
> > > +       WQ_UNBOUND_MAX_ACTIVE);
> > > + KUNIT_ASSERT_NOT_NULL(test, ctx->sub_wq);
> > > +
> > > + ret = kunit_add_action_or_reset(test, complete_destroy_workqueue, ctx);
> > > + KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > + ctx->test = test;
> > > + test->priv = ctx;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +struct drm_sched_parallel_params {
> > > + const char *description;
> > > + unsigned int num_jobs;
> > > + unsigned int num_workers;
> > > +};
> > > +
> > > +static const struct drm_sched_parallel_params drm_sched_parallel_cases[] = {
> > > + {
> > > + .description = "Parallel submission of multiple jobs per worker",
> > > + .num_jobs = 8,
> > > + .num_workers = 16,
> > > + },
> > > +};
> > > +
> > > +static void
> > > +drm_sched_parallel_desc(const struct drm_sched_parallel_params *params, char *desc)
> > > +{
> > > + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > > +}
> > > +
> > > +KUNIT_ARRAY_PARAM(drm_sched_parallel, drm_sched_parallel_cases, drm_sched_parallel_desc);
> >
> > As long as you don't plan to add more test cases any time soon the
> > boiler plate could be reduced by unot using KUNIT_ARRAY_PARAM but it is
> > up to you. (Same for the other test below.)
>
> Right.
>
> > > +
> > > +struct parallel_worker {
> > > + struct work_struct work;
> > > + struct sched_concurrent_context *ctx;
> > > + struct drm_mock_sched_entity *entity;
> > > + struct drm_mock_sched_job **jobs;
> > > + unsigned int id;
> > > +};
> > > +
> > > +static void drm_sched_parallel_worker(struct work_struct *work)
> > > +{
> > > + const struct drm_sched_parallel_params *params;
> > > + struct sched_concurrent_context *test_ctx;
> > > + struct parallel_worker *worker;
> > > + unsigned int i;
> > > +
> > > + worker = container_of(work, struct parallel_worker, work);
> > > + test_ctx = worker->ctx;
> > > + params = test_ctx->test->param_value;
> > > +
> > > + wait_for_completion(&test_ctx->wait_go);
> > > +
> > > + kunit_info(test_ctx->test, "Parallel worker %u submitting %u jobs started\n",
> > > +    worker->id, params->num_jobs);
> > > +
> > > + for (i = 0; i < params->num_jobs; i++)
> > > + drm_mock_sched_job_submit(worker->jobs[i]);
> > > +}
> > > +
> > > +/*
> > > + * Spawns workers that submit a sequence of jobs to the mock scheduler.
> > > + * Once all jobs are submitted, the timeline is manually advanced.
> > > + */
> > > +static void drm_sched_parallel_submit_test(struct kunit *test)
> > > +{
> > > + struct sched_concurrent_context *ctx = test->priv;
> > > + const struct drm_sched_parallel_params *params = test->param_value;
> > > + struct parallel_worker *workers, *worker;
> > > + struct drm_mock_sched_job *job;
> > > + unsigned int i, j, completed_jobs, total_jobs;
> > > + bool done;
> > > + int ret;
> > > +
> > > + KUNIT_ASSERT_GT(test, params->num_workers, 0);
> > > + KUNIT_ASSERT_GT(test, params->num_jobs, 0);
> > > +
> > > + workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > > + GFP_KERNEL);
> > > + KUNIT_ASSERT_NOT_NULL(test, workers);
> > > +
> > > + /*
> > > + * Init workers only after all jobs and entities have been successfully
> > > + * allocated. In this way, the cleanup logic for when an assertion fail
> > > + * can be simplified.
> > > + */
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + worker->id = i;
> > > + worker->ctx = ctx;
> > > + worker->entity = drm_mock_sched_entity_new(test,
> > > +    DRM_SCHED_PRIORITY_NORMAL,
> > > +    ctx->sched);
> > > +
> > > + ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> > > + worker->entity);
> > > + KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > + worker->jobs = kunit_kcalloc(test, params->num_jobs,
> > > +      sizeof(*worker->jobs), GFP_KERNEL);
> > > + KUNIT_ASSERT_NOT_NULL(test, worker->jobs);
> > > +
> > > + for (j = 0; j < params->num_jobs; j++) {
> > > + job = drm_mock_sched_job_new(test, worker->entity);
> > > + worker->jobs[j] = job;
> > > + }
> > > + }
> > > +
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + INIT_WORK(&worker->work, drm_sched_parallel_worker);
> > > + queue_work(ctx->sub_wq, &worker->work);
> > > + }
> > > +
> > > + complete_all(&ctx->wait_go);
> > > + flush_workqueue(ctx->sub_wq);
> > > +
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + for (j = 0; j < params->num_jobs; j++) {
> > > + job = worker->jobs[j];
> > > + done = drm_mock_sched_job_wait_scheduled(job, HZ);
> > > + KUNIT_EXPECT_TRUE(test, done);
> > > + }
> > > + }
> > > +
> > > + total_jobs = params->num_workers * params->num_jobs;
> > > + completed_jobs = drm_mock_sched_advance(ctx->sched, total_jobs);
> > > + KUNIT_EXPECT_EQ(test, completed_jobs, total_jobs);
> > > +
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + for (j = 0; j < params->num_jobs; j++) {
> > > + job = worker->jobs[j];
> > > + done = drm_mock_sched_job_wait_finished(job, HZ);
> > > + KUNIT_EXPECT_TRUE(test, done);
> > > + }
> > > + }
> > > +}
> > > +
> > > +struct drm_sched_interleaved_params {
> > > + const char *description;
> > > + unsigned int test_duration_ms;
> > > + unsigned int job_base_duration_us;
> > > + unsigned int num_workers;
> > > + unsigned int num_in_flight_jobs;
> > > +};
> > > +
> > > +static const struct drm_sched_interleaved_params drm_sched_interleaved_cases[] = {
> > > + {
> > > + .description = "Interleaved submission of multiple jobs per worker",
> > > + .test_duration_ms = 1000,
> > > + .job_base_duration_us = 100,
> > > + .num_workers = 16,
> > > + .num_in_flight_jobs = 8,
> > > + },
> > > +};
> > > +
> > > +static void
> > > +drm_sched_interleaved_desc(const struct drm_sched_interleaved_params *params, char *desc)
> > > +{
> > > + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE);
> > > +}
> > > +
> > > +KUNIT_ARRAY_PARAM(drm_sched_interleaved, drm_sched_interleaved_cases,
> > > + drm_sched_interleaved_desc);
> > > +
> > > +struct interleaved_worker {
> > > + struct work_struct work;
> > > + struct sched_concurrent_context *ctx;
> > > + struct drm_mock_sched_entity *entity;
> > > + struct drm_mock_sched_job **jobs;
> > > + unsigned int id;
> > > + unsigned int job_count;
> > > + unsigned int job_duration_us;
> > > +};
> > > +
> > > +static void drm_sched_interleaved_worker(struct work_struct *work)
> > > +{
> > > + struct sched_concurrent_context *test_ctx;
> > > + const struct drm_sched_interleaved_params *params;
> > > + struct interleaved_worker *worker;
> > > + unsigned int i, j, max_in_flight_job;
> > > + unsigned long timeout;
> > > + bool done;
> > > +
> > > + worker = container_of(work, struct interleaved_worker, work);
> > > + test_ctx = worker->ctx;
> > > + params = test_ctx->test->param_value;
> > > +
> > > + wait_for_completion(&test_ctx->wait_go);
> > > +
> > > + kunit_info(test_ctx->test, "Worker %u submitting %u jobs of %u us started\n",
> > > +    worker->id, worker->job_count, worker->job_duration_us);
> > > +
> > > + timeout = msecs_to_jiffies(params->test_duration_ms * 2);
> > > +
> > > + /* Fill the submission window */
> > > + max_in_flight_job = min(worker->job_count, params->num_in_flight_jobs);
> > > + for (i = 0; i < max_in_flight_job; i++)
> > > + drm_mock_sched_job_submit(worker->jobs[i]);
> > > +
> > > + /* Keep the window full by submitting a new job at once until done */
> > > + for (i = 0; i < worker->job_count; i++) {
> > > + done = drm_mock_sched_job_wait_finished(worker->jobs[i], timeout);
> > > + if (!done)
> > > + kunit_info(test_ctx->test, "Job %u of worker %u timed out\n",
> > > +    i, worker->id);
> > > +
> > > + j = i + max_in_flight_job;
> > > + if (j < worker->job_count)
> > > + drm_mock_sched_job_submit(worker->jobs[j]);
> > > + }
> >
> > The loop maybe isn't the most straight-forward but eventually looked to
> > me like it will do what it wants.
>
> I will address this in my reply to your next message.
>
> > num_in_flight_jobs as a test parameter is perhaps arbitrary? I am not
> > sure what it aims to achieve versus if workers would just be submitting
> > one by one, or perhaps two by two, in both cases loops would be simpler.
>
> I think it's better to have it as a parameter rather than hardcoded in
> the test.

The tests are basically here to help us developers percuss the
scheduler. So I would say we don't need to put thaaaat much focus on
such details. Parameter sounds nice to me, but hardcoded wouldn't break
any bones either.

>
> > Or even if num_in_flight_jobs would perhaps be automatically derived
> > from the worker->id? If the goal is to simulate queue depth it sounds
> > good to vary it.
>
> That's a nice idea, but I think having a fixed number of in_flight_jobs
> expressed as a parameter is a good balance between test expressiveness
> and simplicity.

I think those are very detailed questions that one probably wants to
address in a hypothetical "test payloads more realistically"-patch.

This patch overall looks very good and tidy to me and I think we
soonish want to take it as the "provide ground layer for concurrent job
submission testing"-patch.

So unless someone sees major blockers like bugs, I would like to take
it in soonish, since it's been on-list for quite some time now and we
seem to loose ourselves in deep-ish details :]

>
> > But these are minor points and up to you. The test now looks passably
> > tidy as it is.
>
> Thanks for the tidy encouragement.
>
> > Ah wait, please also check if this one needs to be marked as "slow"
> > kunit test. AFAIR all that are expected to take more than 1s need that.
>
> I've run it with UML and QEMU (arch=x86_64) and with the current parameters
> kunit does not recommend to mark it as slow. But maybe you are right, it's
> safer to mark it as slow anyway.

+1

If you can provide that in v3, it would seem we're good to go.

Thanks Marco

P.

>
> Thanks,
> Marco
>
> >
> > > +}
> > > +
> > > +/*
> > > + * Spawns workers that submit a sequence of jobs to the mock scheduler. Job
> > > + * durations are chosen as multiples of a base duration value specified as
> > > + * a test parameter. Since the scheduler serializes jobs from all workers,
> > > + * the total test duration budget is divided into equal shares among workers.
> > > + * These shares are then used to compute the number of jobs that each worker
> > > + * can submit
> > > + */
> > > +static void drm_sched_interleaved_submit_test(struct kunit *test)
> > > +{
> > > + const struct drm_sched_interleaved_params *params = test->param_value;
> > > + struct sched_concurrent_context *ctx = test->priv;
> > > + struct interleaved_worker *workers, *worker;
> > > + struct drm_mock_sched_job *job;
> > > + unsigned int worker_share_us;
> > > + unsigned int i, j;
> > > + bool done;
> > > + int ret;
> > > +
> > > + KUNIT_ASSERT_GT(test, params->num_workers, 0);
> > > + KUNIT_ASSERT_GT(test, params->job_base_duration_us, 0);
> > > +
> > > + workers = kunit_kcalloc(test, params->num_workers, sizeof(*workers),
> > > + GFP_KERNEL);
> > > + KUNIT_ASSERT_NOT_NULL(test, workers);
> > > +
> > > + /* Divide the available test time into equal shares among the workers */
> > > + worker_share_us = (params->test_duration_ms * USEC_PER_MSEC) /
> > > + params->num_workers;
> > > +
> > > + /*
> > > + * Init workers only after all jobs and entities have been successfully
> > > + * allocated. In this way, the cleanup logic for when an assertion fails
> > > + * can be simplified.
> > > + */
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + worker->id = i;
> > > + worker->ctx = ctx;
> > > +
> > > + worker->job_duration_us = params->job_base_duration_us * (i + 1);
> > > + worker->job_count = worker_share_us / worker->job_duration_us;
> > > + worker->job_count = max(1U, worker->job_count);
> > > +
> > > + worker->entity = drm_mock_sched_entity_new(test,
> > > +    DRM_SCHED_PRIORITY_NORMAL,
> > > +    ctx->sched);
> > > +
> > > + ret = kunit_add_action_or_reset(test, drm_mock_sched_entity_free_wrap,
> > > + worker->entity);
> > > + KUNIT_ASSERT_EQ(test, ret, 0);
> > > +
> > > + worker->jobs = kunit_kcalloc(test, worker->job_count,
> > > +      sizeof(*worker->jobs), GFP_KERNEL);
> > > + KUNIT_ASSERT_NOT_NULL(test, worker->jobs);
> > > +
> > > + for (j = 0; j < worker->job_count; j++) {
> > > + job = drm_mock_sched_job_new(test, worker->entity);
> > > + drm_mock_sched_job_set_duration_us(job, worker->job_duration_us);
> > > +
> > > + worker->jobs[j] = job;
> > > + }
> > > + }
> > > +
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + INIT_WORK(&worker->work, drm_sched_interleaved_worker);
> > > + queue_work(ctx->sub_wq, &worker->work);
> > > + }
> > > +
> > > + complete_all(&ctx->wait_go);
> > > + flush_workqueue(ctx->sub_wq);
> > > +
> > > + for (i = 0; i < params->num_workers; i++) {
> > > + worker = &workers[i];
> > > + for (j = 0; j < worker->job_count; j++) {
> > > + job = worker->jobs[j];
> > > + done = drm_mock_sched_job_is_finished(job);
> > > + KUNIT_EXPECT_TRUE(test, done);
> > > + }
> > > + }
> > > +}
> > > +
> > > +static struct kunit_case drm_sched_concurrent_tests[] = {
> > > + KUNIT_CASE_PARAM(drm_sched_parallel_submit_test, drm_sched_parallel_gen_params),
> > > + KUNIT_CASE_PARAM(drm_sched_interleaved_submit_test, drm_sched_interleaved_gen_params),
> > > + {}
> > > +};
> > > +
> > > +static struct kunit_suite drm_sched_concurrent = {
> > > + .name = "drm_sched_concurrent_tests",
> > > + .init = drm_sched_concurrent_init,
> > > + .test_cases = drm_sched_concurrent_tests,
> > > +};
> > > +
> > > static struct kunit_case drm_sched_cancel_tests[] = {
> > >    KUNIT_CASE(drm_sched_basic_cancel),
> > >    {}
> > > @@ -556,6 +911,7 @@ static struct kunit_suite drm_sched_credits = {
> > > };
> > >
> > > kunit_test_suites(&drm_sched_basic,
> > > + &drm_sched_concurrent,
> > >    &drm_sched_timeout,
> > >    &drm_sched_cancel,
> > >    &drm_sched_priority,
> >
>