Re: [PATCH 4/5] sched_ext/selftests: Add bypass mode operational test

From: Andrea Righi

Date: Fri Mar 06 2026 - 10:02:42 EST


Hi,

On Fri, Mar 06, 2026 at 10:03:24PM +0800, zhidao su wrote:
> From: Su Zhidao <suzhidao@xxxxxxxxxx>
>
> Add a test that verifies the sched_ext bypass mechanism does not
> prevent tasks from running to completion.
>
> The test attaches a minimal global FIFO scheduler, spawns worker
> processes that complete a fixed computation, detaches the scheduler
> (which triggers bypass mode while workers are still active), and
> verifies all workers complete successfully under bypass mode.
>
> This exercises the scheduler attach/detach lifecycle and verifies
> that bypass mode (activated during unregistration to guarantee
> forward progress) does not stall running tasks.

I'm not sure this selftest adds much value. Implicitly we're already
testing the validity of bypass in the other sched_ext kselftests: if a task
is missed or gets stuck due to bypass mode, we would trigger a soft lockup,
a hung task timeout, or something similar.

>
> Signed-off-by: Su Zhidao <suzhidao@xxxxxxxxxx>
> ---
> tools/testing/selftests/sched_ext/Makefile | 1 +
> .../testing/selftests/sched_ext/bypass.bpf.c | 32 ++++++
> tools/testing/selftests/sched_ext/bypass.c | 105 ++++++++++++++++++
> 3 files changed, 138 insertions(+)
> create mode 100644 tools/testing/selftests/sched_ext/bypass.bpf.c
> create mode 100644 tools/testing/selftests/sched_ext/bypass.c
>
> diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
> index a3bbe2c7911b..5fb6278d3f97 100644
> --- a/tools/testing/selftests/sched_ext/Makefile
> +++ b/tools/testing/selftests/sched_ext/Makefile
> @@ -162,6 +162,7 @@ endef
> all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubst %.c,%.skel.h,$(prog)))
>
> auto-test-targets := \
> + bypass \
> create_dsq \
> dequeue \
> enq_last_no_enq_fails \
> diff --git a/tools/testing/selftests/sched_ext/bypass.bpf.c b/tools/testing/selftests/sched_ext/bypass.bpf.c
> new file mode 100644
> index 000000000000..cb37c8df6834
> --- /dev/null
> +++ b/tools/testing/selftests/sched_ext/bypass.bpf.c
> @@ -0,0 +1,32 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * BPF scheduler for bypass mode operational test.
> + *
> + * Implements a minimal global FIFO scheduler. The userspace side
> + * attaches this scheduler, runs worker tasks to completion, and
> + * verifies that tasks complete successfully.
> + *
> + * Copyright (c) 2026 Xiaomi Corporation.
> + */
> +#include <scx/common.bpf.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +UEI_DEFINE(uei);
> +
> +void BPF_STRUCT_OPS(bypass_enqueue, struct task_struct *p, u64 enq_flags)
> +{
> + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
> +}

We could also remove bypass_enqueue() and sched_ext core will do exactly
the same (implicitly enqueue to SCX_DSQ_GLOBAL).

> +
> +void BPF_STRUCT_OPS(bypass_exit, struct scx_exit_info *ei)
> +{
> + UEI_RECORD(uei, ei);
> +}
> +
> +SEC(".struct_ops.link")
> +struct sched_ext_ops bypass_ops = {
> + .enqueue = (void *)bypass_enqueue,
> + .exit = (void *)bypass_exit,
> + .name = "bypass_test",
> +};
> diff --git a/tools/testing/selftests/sched_ext/bypass.c b/tools/testing/selftests/sched_ext/bypass.c
> new file mode 100644
> index 000000000000..952f09d76bdb
> --- /dev/null
> +++ b/tools/testing/selftests/sched_ext/bypass.c
> @@ -0,0 +1,105 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Verify the sched_ext bypass mechanism: spawn worker tasks and ensure
> + * they run to completion while a BPF scheduler is active.
> + *
> + * The bypass mechanism (activated on scheduler unregistration) must
> + * guarantee forward progress. This test verifies that worker tasks
> + * complete successfully when the scheduler is detached.
> + *
> + * Copyright (c) 2026 Xiaomi Corporation.
> + */
> +#define _GNU_SOURCE
> +#include <unistd.h>
> +#include <sys/wait.h>
> +#include <bpf/bpf.h>
> +#include <scx/common.h>
> +#include "scx_test.h"
> +#include "bypass.bpf.skel.h"
> +
> +#define NUM_BYPASS_WORKERS 4
> +
> +static void worker_fn(void)
> +{
> + volatile int sum = 0;
> + int i;
> +
> + /*
> + * Do enough work to still be running when bpf_link__destroy()
> + * is called, ensuring tasks are active during bypass mode.
> + */
> + for (i = 0; i < 10000000; i++)
> + sum += i;
> +}
> +
> +static enum scx_test_status setup(void **ctx)
> +{
> + struct bypass *skel;
> +
> + skel = bypass__open();
> + SCX_FAIL_IF(!skel, "Failed to open bypass skel");
> + SCX_ENUM_INIT(skel);
> + SCX_FAIL_IF(bypass__load(skel), "Failed to load bypass skel");
> +
> + *ctx = skel;
> + return SCX_TEST_PASS;
> +}
> +
> +static enum scx_test_status run(void *ctx)
> +{
> + struct bypass *skel = ctx;
> + struct bpf_link *link;
> + pid_t pids[NUM_BYPASS_WORKERS];
> + int i, status;
> +
> + link = bpf_map__attach_struct_ops(skel->maps.bypass_ops);
> + SCX_FAIL_IF(!link, "Failed to attach bypass scheduler");
> +
> + /*
> + * Spawn worker processes. These must complete successfully
> + * even as the scheduler is active and then detached (which
> + * triggers bypass mode).
> + */
> + for (i = 0; i < NUM_BYPASS_WORKERS; i++) {
> + pids[i] = fork();
> + SCX_FAIL_IF(pids[i] < 0, "fork() failed for worker %d", i);
> +
> + if (pids[i] == 0) {
> + worker_fn();
> + _exit(0);
> + }
> + }

There's no synchronization with the parent, so on a fast system the workers
may even finish the loop before the parent ever detaches the scheduler.

> +
> + /*
> + * Detach the scheduler while workers are still running. This
> + * triggers bypass mode, which must guarantee forward progress
> + * for all active tasks.
> + */
> + bpf_link__destroy(link);
> +
> + /* Workers must complete successfully under bypass mode */
> + for (i = 0; i < NUM_BYPASS_WORKERS; i++) {
> + SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
> + "waitpid failed for worker %d", i);
> + SCX_FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status) != 0,
> + "Worker %d did not exit cleanly", i);
> + }
> +
> + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG));
> +
> + return SCX_TEST_PASS;
> +}
> +
> +static void cleanup(void *ctx)
> +{
> + bypass__destroy(ctx);
> +}
> +
> +struct scx_test bypass_test = {
> + .name = "bypass",
> + .description = "Verify tasks complete during bypass mode",
> + .setup = setup,
> + .run = run,
> + .cleanup = cleanup,
> +};
> +REGISTER_SCX_TEST(&bypass_test)
> --
> 2.43.0
>

Thanks,
-Andrea