[PATCH RFC 3/4] DEBUG selftests/sched: Add tests for SCHED_DEADLINE demotion feature
From: Juri Lelli
Date: Thu Feb 19 2026 - 08:39:13 EST
Add functional and stress tests for the SCHED_FLAG_DL_DEMOTION feature.
The functional test (dl_demotion_test.c) verifies:
- Basic demotion on runtime exhaustion
- Promotion when replenishment timer fires
- Explicit parameter change clears demotion state
- No demotion without SCHED_FLAG_DL_DEMOTION
The stress test (dl_demotion_stress.c) creates multiple demoting tasks
running concurrently to verify bandwidth accounting and state machine
correctness under load.
Also include a helper script for running tests with ftrace enabled to
aid in debugging bandwidth accounting issues.
Assisted-by: Claude Code:Sonnet 4.5
Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx>
---
tools/testing/selftests/sched/.gitignore | 2 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/README_dl_demotion | 83 ++++
tools/testing/selftests/sched/dl_demotion_stress.c | 208 ++++++++++
tools/testing/selftests/sched/dl_demotion_test.c | 460 +++++++++++++++++++++
.../selftests/sched/run_dl_demotion_with_trace.sh | 71 ++++
6 files changed, 826 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
index 6996d4654d924..c8139d0067df4 100644
--- a/tools/testing/selftests/sched/.gitignore
+++ b/tools/testing/selftests/sched/.gitignore
@@ -1 +1,3 @@
cs_prctl_test
+dl_demotion_test
+dl_demotion_stress
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557a..0938acab18700 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test dl_demotion_test dl_demotion_stress
+TEST_PROGS := cs_prctl_test dl_demotion_test
include ../lib.mk
diff --git a/tools/testing/selftests/sched/README_dl_demotion b/tools/testing/selftests/sched/README_dl_demotion
new file mode 100644
index 0000000000000..1cdd10fbbd7d1
--- /dev/null
+++ b/tools/testing/selftests/sched/README_dl_demotion
@@ -0,0 +1,83 @@
+SCHED_DEADLINE Demotion Tests
+==============================
+
+This test verifies the SCHED_FLAG_DL_DEMOTION feature which allows DEADLINE
+tasks to be demoted to SCHED_NORMAL when they exhaust their runtime budget.
+
+Building
+--------
+ make -C tools/testing/selftests/sched
+
+Running
+-------
+Requires root or CAP_SYS_NICE:
+
+ sudo ./tools/testing/selftests/sched/dl_demotion_test
+
+Or via kselftest framework:
+
+ sudo make -C tools/testing/selftests TARGETS=sched run_tests
+
+Tests
+-----
+
+Test 1: Basic demotion on runtime exhaustion
+ - Creates a DEADLINE task with SCHED_FLAG_DL_DEMOTION
+ - Runs until runtime is exhausted
+ - Verifies task is demoted to SCHED_NORMAL
+
+Test 2: Promotion on replenishment timer
+ - Gets demoted by exhausting runtime
+ - Waits for period to expire
+ - Verifies task is promoted back to SCHED_DEADLINE
+
+Test 3: Explicit parameter change while demoted
+ - Gets demoted
+ - Explicitly changes scheduling parameters
+ - Verifies demotion state is cleared (no automatic promotion)
+
+Test 4: No demotion without flag
+ - Creates DEADLINE task WITHOUT demotion flag
+ - Exhausts runtime
+ - Verifies task remains SCHED_DEADLINE (throttled but not demoted)
+
+Stress Test
+-----------
+The dl_demotion_stress test creates multiple threads that repeatedly go through
+demotion/promotion cycles. This is useful for stress testing the feature,
+especially migration scenarios.
+
+ sudo ./tools/testing/selftests/sched/dl_demotion_stress [threads] [duration]
+
+Arguments:
+ threads - Number of worker threads (1-32, default: 4)
+ duration - Run duration in seconds (default: 10)
+
+Example:
+ sudo ./tools/testing/selftests/sched/dl_demotion_stress 8 30
+
+This test is NOT part of the automated test suite (not in TEST_PROGS) and
+must be run manually.
+
+Debugging
+---------
+To see the demotion/promotion state machine transitions, enable ftrace:
+
+ sudo su
+ cd /sys/kernel/debug/tracing
+ echo 1 > events/sched/enable
+ echo 1 > options/trace_printk
+ echo 1 > tracing_on
+
+Then run the test and check the trace:
+
+ cat trace
+
+Look for trace_printk messages showing state transitions:
+ - dl_demote: ... state: NOT_DEMOTED->DEMOTING
+ - dl_demote: ... state: DEMOTING->DEMOTED
+ - dl_promote: ... state: DEMOTED->PROMOTING
+ - dl_promote: ... state: PROMOTING->NOT_DEMOTED
+ - dl_timer: ... migrated_while_runnable/sleeping
+ - switched_from_dl: ... skip_bw_accounting
+ - switched_to_dl: ... skip_bw_accounting
diff --git a/tools/testing/selftests/sched/dl_demotion_stress.c b/tools/testing/selftests/sched/dl_demotion_stress.c
new file mode 100644
index 0000000000000..6e404d6b56af9
--- /dev/null
+++ b/tools/testing/selftests/sched/dl_demotion_stress.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SCHED_DEADLINE demotion stress test
+ *
+ * Creates multiple DEADLINE tasks with demotion enabled and runs them
+ * to stress test the demotion/promotion state machine, especially with
+ * migration scenarios.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+
+#ifndef SCHED_FLAG_DL_DEMOTION
+#define SCHED_FLAG_DL_DEMOTION 0x80
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static volatile int keep_running = 1;
+
+/* Wrappers for sched_setattr/getattr - use syscall directly to avoid glibc conflicts */
+static int sys_sched_setattr(pid_t pid, struct sched_attr *attr,
+ unsigned int flags)
+{
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+}
+
+static int sys_sched_getattr(pid_t pid, struct sched_attr *attr,
+ unsigned int size, unsigned int flags)
+{
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+}
+
+/* Signal handler for clean shutdown */
+static void sigint_handler(int sig)
+{
+ (void)sig;
+ keep_running = 0;
+}
+
+/* Burn CPU cycles */
+static void burn_cpu(uint64_t nsec)
+{
+ struct timespec start, now;
+ uint64_t elapsed_ns;
+ volatile uint64_t dummy = 0;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ for (int i = 0; i < 10000; i++)
+ dummy += i;
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed_ns = (now.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (now.tv_nsec - start.tv_nsec);
+ } while (elapsed_ns < nsec);
+}
+
+/* Thread function - repeatedly exhaust runtime and get demoted/promoted */
+static void *worker_thread(void *arg)
+{
+ int thread_id = *(int *)arg;
+ struct sched_attr attr = {0};
+ int cycles = 0;
+ cpu_set_t cpuset;
+
+ /* Set CPU affinity to allow migration */
+ CPU_ZERO(&cpuset);
+ /* Allow running on CPUs 0-3 (adjust based on system) */
+ for (int i = 0; i < 4 && i < sysconf(_SC_NPROCESSORS_ONLN); i++)
+ CPU_SET(i, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+ /* Set DEADLINE with demotion */
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 20 * 1000 * 1000; /* 20ms */
+ attr.sched_deadline = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_period = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_flags = SCHED_FLAG_DL_DEMOTION;
+ attr.sched_nice = thread_id % 10; /* Different nice values */
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ perror("sched_setattr");
+ return NULL;
+ }
+
+ printf("Thread %d: Started with SCHED_DEADLINE (runtime=20ms, period=100ms, nice=%d)\n",
+ thread_id, attr.sched_nice);
+
+ while (keep_running) {
+ /* Burn CPU to exhaust runtime and trigger demotion */
+ burn_cpu(25 * 1000 * 1000); /* 25ms - exceeds 20ms runtime */
+
+ /* Now we should be demoted - do some light work as NORMAL */
+ usleep(10 * 1000); /* 10ms */
+
+ /* Wait for promotion (period expiry) */
+ usleep(120 * 1000); /* 120ms - exceeds 100ms period */
+
+ cycles++;
+ if (cycles % 10 == 0) {
+ printf("Thread %d: Completed %d demotion/promotion cycles\n",
+ thread_id, cycles);
+ }
+ }
+
+ printf("Thread %d: Exiting after %d cycles\n", thread_id, cycles);
+
+ /* Reset to normal before exiting */
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ int num_threads = 4;
+ pthread_t *threads;
+ int *thread_ids;
+ int duration = 10; /* seconds */
+
+ /* Parse arguments */
+ if (argc > 1)
+ num_threads = atoi(argv[1]);
+ if (argc > 2)
+ duration = atoi(argv[2]);
+
+ if (num_threads < 1 || num_threads > 32) {
+ fprintf(stderr, "Number of threads must be 1-32\n");
+ return 1;
+ }
+
+ printf("SCHED_DEADLINE Demotion Stress Test\n");
+ printf("====================================\n");
+ printf("Threads: %d\n", num_threads);
+ printf("Duration: %d seconds\n", duration);
+ printf("Press Ctrl+C to stop early\n\n");
+
+ /* Check permissions */
+ struct sched_attr attr = {0};
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000;
+ attr.sched_deadline = 100 * 1000 * 1000;
+ attr.sched_period = 100 * 1000 * 1000;
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ if (errno == EPERM) {
+ fprintf(stderr, "Need CAP_SYS_NICE or root privileges\n");
+ return 1;
+ } else if (errno == EINVAL) {
+ fprintf(stderr, "SCHED_DEADLINE or SCHED_FLAG_DL_DEMOTION not supported\n");
+ return 1;
+ }
+ }
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+
+ /* Set up signal handler */
+ signal(SIGINT, sigint_handler);
+
+ /* Allocate thread arrays */
+ threads = malloc(num_threads * sizeof(pthread_t));
+ thread_ids = malloc(num_threads * sizeof(int));
+ if (!threads || !thread_ids) {
+ fprintf(stderr, "Memory allocation failed\n");
+ return 1;
+ }
+
+ /* Create threads */
+ for (int i = 0; i < num_threads; i++) {
+ thread_ids[i] = i;
+ if (pthread_create(&threads[i], NULL, worker_thread, &thread_ids[i]) != 0) {
+ perror("pthread_create");
+ keep_running = 0;
+ break;
+ }
+ }
+
+ /* Run for specified duration */
+ sleep(duration);
+ keep_running = 0;
+
+ /* Wait for threads to finish */
+ printf("\nWaiting for threads to finish...\n");
+ for (int i = 0; i < num_threads; i++) {
+ pthread_join(threads[i], NULL);
+ }
+
+ free(threads);
+ free(thread_ids);
+
+ printf("\nStress test completed successfully\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/dl_demotion_test.c b/tools/testing/selftests/sched/dl_demotion_test.c
new file mode 100644
index 0000000000000..11ffe1c9ecbed
--- /dev/null
+++ b/tools/testing/selftests/sched/dl_demotion_test.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SCHED_DEADLINE demotion/promotion test
+ *
+ * Tests the SCHED_FLAG_DL_DEMOTION feature which allows DEADLINE tasks
+ * to be demoted to SCHED_NORMAL when they exhaust their runtime, and
+ * promoted back when the replenishment timer fires.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <pthread.h>
+
+#ifndef SCHED_FLAG_DL_DEMOTION
+#define SCHED_FLAG_DL_DEMOTION 0x80
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+#define USEC_PER_SEC 1000000ULL
+
+/* Ftrace marker file */
+static int trace_marker_fd = -1;
+
+/* Wrappers for sys_sched_setattr/getattr - use syscall directly to avoid glibc conflicts */
+static int sys_sched_setattr(pid_t pid, struct sched_attr *attr,
+ unsigned int flags)
+{
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+}
+
+static int sys_sched_getattr(pid_t pid, struct sched_attr *attr,
+ unsigned int size, unsigned int flags)
+{
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+}
+
+/* Initialize ftrace marker for userspace tracing */
+static void trace_marker_init(void)
+{
+ const char *paths[] = {
+ "/sys/kernel/tracing/trace_marker",
+ "/sys/kernel/debug/tracing/trace_marker",
+ NULL
+ };
+
+ for (int i = 0; paths[i]; i++) {
+ trace_marker_fd = open(paths[i], O_WRONLY);
+ if (trace_marker_fd >= 0)
+ break;
+ }
+}
+
+/* Write a message to ftrace buffer */
+static void trace_write(const char *fmt, ...)
+{
+ char buf[256];
+ va_list args;
+ int len;
+
+ if (trace_marker_fd < 0)
+ return;
+
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len > 0)
+ write(trace_marker_fd, buf, len);
+}
+
+/* Close ftrace marker */
+static void trace_marker_close(void)
+{
+ if (trace_marker_fd >= 0) {
+ close(trace_marker_fd);
+ trace_marker_fd = -1;
+ }
+}
+
+/* Burn CPU cycles for approximately nsec nanoseconds */
+static void burn_cpu(uint64_t nsec)
+{
+ struct timespec start, now;
+ uint64_t elapsed_ns;
+ volatile uint64_t dummy = 0;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ for (int i = 0; i < 10000; i++)
+ dummy += i;
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed_ns = (now.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (now.tv_nsec - start.tv_nsec);
+ } while (elapsed_ns < nsec);
+}
+
+/* Get current scheduling policy */
+static int get_current_policy(void)
+{
+ struct sched_attr attr = {0};
+ attr.size = sizeof(attr);
+
+ if (sys_sched_getattr(0, &attr, sizeof(attr), 0) < 0) {
+ perror("sys_sched_getattr");
+ return -1;
+ }
+
+ return attr.sched_policy;
+}
+
+/*
+ * Test 1: Basic demotion when runtime exhausted
+ *
+ * Create a DEADLINE task with demotion flag, run it until runtime
+ * is exhausted, verify it gets demoted to SCHED_NORMAL.
+ */
+static int test_basic_demotion(void)
+{
+ struct sched_attr attr = {0};
+ int policy_before, policy_after;
+
+ printf("Test 1: Basic demotion on runtime exhaustion\n");
+ trace_write("TEST1: START - Basic demotion on runtime exhaustion");
+
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000; /* 10ms */
+ attr.sched_deadline = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_period = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_flags = SCHED_FLAG_DL_DEMOTION;
+ attr.sched_nice = 0; /* Nice value when demoted */
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ if (errno == EPERM) {
+ printf(" SKIP: Need CAP_SYS_NICE or root privileges\n");
+ return 0;
+ }
+ if (errno == EINVAL) {
+ printf(" SKIP: SCHED_FLAG_DL_DEMOTION not supported\n");
+ return 0;
+ }
+ perror(" FAIL: sys_sched_setattr");
+ return -1;
+ }
+
+ policy_before = get_current_policy();
+ if (policy_before != SCHED_DEADLINE) {
+ printf(" FAIL: Not SCHED_DEADLINE after setattr (got %d)\n",
+ policy_before);
+ return -1;
+ }
+
+ /* Burn more than the runtime to trigger demotion */
+ printf(" Burning CPU to exhaust runtime...\n");
+ trace_write("TEST1: Burning CPU to exhaust runtime (15ms)");
+ burn_cpu(15 * 1000 * 1000); /* 15ms, more than 10ms runtime */
+ trace_write("TEST1: CPU burn complete, checking policy");
+
+ /* Check if we got demoted */
+ policy_after = get_current_policy();
+ if (policy_after == SCHED_NORMAL) {
+ printf(" PASS: Demoted to SCHED_NORMAL after runtime exhaustion\n");
+ trace_write("TEST1: PASS - Task demoted to SCHED_NORMAL");
+ /* Reset to normal before returning */
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST1: END");
+ return 0;
+ } else {
+ printf(" FAIL: Still policy %d after runtime exhaustion (expected SCHED_NORMAL)\n",
+ policy_after);
+ trace_write("TEST1: FAIL - Task not demoted (policy=%d)", policy_after);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST1: END");
+ return -1;
+ }
+}
+
+/*
+ * Test 2: Promotion when replenishment timer fires
+ *
+ * Get demoted, then sleep until the period expires and verify
+ * we get promoted back to SCHED_DEADLINE.
+ */
+static int test_promotion_on_timer(void)
+{
+ struct sched_attr attr = {0};
+ int policy_before, policy_after;
+
+ printf("\nTest 2: Promotion on replenishment timer\n");
+ trace_write("TEST2: START - Promotion on replenishment timer");
+
+ /* Reset to SCHED_NORMAL before starting */
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000; /* 10ms */
+ attr.sched_deadline = 200 * 1000 * 1000; /* 200ms */
+ attr.sched_period = 200 * 1000 * 1000; /* 200ms */
+ attr.sched_flags = SCHED_FLAG_DL_DEMOTION;
+ attr.sched_nice = 0;
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ if (errno == EINVAL) {
+ printf(" SKIP: SCHED_FLAG_DL_DEMOTION not supported\n");
+ return 0;
+ }
+ perror(" FAIL: sys_sched_setattr");
+ return -1;
+ }
+
+ /* Exhaust runtime to get demoted */
+ printf(" Exhausting runtime...\n");
+ trace_write("TEST2: Exhausting runtime to trigger demotion");
+ burn_cpu(15 * 1000 * 1000); /* 15ms */
+ trace_write("TEST2: CPU burn complete, checking if demoted");
+
+ policy_before = get_current_policy();
+ if (policy_before != SCHED_NORMAL) {
+ printf(" FAIL: Not demoted (policy=%d)\n", policy_before);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ return -1;
+ }
+ printf(" Demoted to SCHED_NORMAL\n");
+ trace_write("TEST2: Confirmed demoted to SCHED_NORMAL");
+
+ /* Wait for period to expire (timer should promote us) */
+ printf(" Waiting for replenishment timer (250ms)...\n");
+ trace_write("TEST2: Waiting for replenishment timer (250ms)");
+ usleep(250 * 1000); /* 250ms, longer than 200ms period */
+ trace_write("TEST2: Wait complete, checking if promoted");
+
+ /* Check if promoted back */
+ policy_after = get_current_policy();
+ if (policy_after == SCHED_DEADLINE) {
+ printf(" PASS: Promoted back to SCHED_DEADLINE\n");
+ trace_write("TEST2: PASS - Promoted back to SCHED_DEADLINE");
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST2: END");
+ return 0;
+ } else {
+ printf(" FAIL: Still policy %d after timer (expected SCHED_DEADLINE)\n",
+ policy_after);
+ trace_write("TEST2: FAIL - Not promoted (policy=%d)", policy_after);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST2: END");
+ return -1;
+ }
+}
+
+/*
+ * Test 3: Explicit parameter change while demoted
+ *
+ * Get demoted, then explicitly change scheduling parameters.
+ * This should clear the demotion state and prevent automatic promotion.
+ */
+static int test_param_change_while_demoted(void)
+{
+ struct sched_attr attr = {0};
+ int policy;
+
+ printf("\nTest 3: Explicit parameter change while demoted\n");
+ trace_write("TEST3: START - Explicit parameter change while demoted");
+
+ /* Reset to SCHED_NORMAL before starting */
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000; /* 10ms */
+ attr.sched_deadline = 200 * 1000 * 1000; /* 200ms */
+ attr.sched_period = 200 * 1000 * 1000; /* 200ms */
+ attr.sched_flags = SCHED_FLAG_DL_DEMOTION;
+ attr.sched_nice = 0;
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ if (errno == EINVAL) {
+ printf(" SKIP: SCHED_FLAG_DL_DEMOTION not supported\n");
+ return 0;
+ }
+ perror(" FAIL: sys_sched_setattr");
+ return -1;
+ }
+
+ /* Exhaust runtime to get demoted */
+ printf(" Exhausting runtime...\n");
+ trace_write("TEST3: Exhausting runtime to trigger demotion");
+ burn_cpu(15 * 1000 * 1000);
+ trace_write("TEST3: Checking if demoted");
+
+ policy = get_current_policy();
+ if (policy != SCHED_NORMAL) {
+ printf(" FAIL: Not demoted (policy=%d)\n", policy);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ return -1;
+ }
+ printf(" Demoted to SCHED_NORMAL\n");
+ trace_write("TEST3: Confirmed demoted to SCHED_NORMAL");
+
+ /* Explicitly change to SCHED_NORMAL (should clear demotion state) */
+ printf(" Explicitly setting SCHED_NORMAL...\n");
+ trace_write("TEST3: Explicitly calling sched_setattr(SCHED_NORMAL) to clear demotion state");
+ attr.sched_policy = SCHED_NORMAL;
+ attr.sched_nice = 5;
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ perror(" FAIL: sys_sched_setattr to NORMAL");
+ return -1;
+ }
+
+ /* Wait past the period - should NOT be promoted */
+ printf(" Waiting past period (250ms)...\n");
+ trace_write("TEST3: Waiting past period - should NOT be promoted");
+ usleep(250 * 1000);
+ trace_write("TEST3: Wait complete, verifying still NORMAL");
+
+ policy = get_current_policy();
+ if (policy == SCHED_NORMAL) {
+ printf(" PASS: Remained SCHED_NORMAL (demotion state cleared)\n");
+ trace_write("TEST3: PASS - Remained SCHED_NORMAL");
+ trace_write("TEST3: END");
+ return 0;
+ } else {
+ printf(" FAIL: Unexpected promotion to policy %d\n", policy);
+ trace_write("TEST3: FAIL - Unexpected promotion to policy %d", policy);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST3: END");
+ return -1;
+ }
+}
+
+/*
+ * Test 4: Demotion disabled without flag
+ *
+ * Create DEADLINE task without demotion flag, exhaust runtime,
+ * verify task stays SCHED_DEADLINE (throttled but not demoted).
+ */
+static int test_no_demotion_without_flag(void)
+{
+ struct sched_attr attr = {0};
+ int policy;
+
+ printf("\nTest 4: No demotion without SCHED_FLAG_DL_DEMOTION\n");
+ trace_write("TEST4: START - No demotion without flag");
+
+ /* Reset to SCHED_NORMAL before starting */
+ attr.size = sizeof(attr);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000; /* 10ms */
+ attr.sched_deadline = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_period = 100 * 1000 * 1000; /* 100ms */
+ attr.sched_flags = 0; /* No demotion flag */
+ attr.sched_nice = 0;
+
+ if (sys_sched_setattr(0, &attr, 0) < 0) {
+ perror(" FAIL: sys_sched_setattr");
+ return -1;
+ }
+
+ /* Burn CPU to exhaust runtime */
+ printf(" Exhausting runtime...\n");
+ trace_write("TEST4: Exhausting runtime (no demotion flag set)");
+ burn_cpu(15 * 1000 * 1000);
+ trace_write("TEST4: CPU burn complete, checking policy");
+
+ /* Should still be SCHED_DEADLINE (throttled, not demoted) */
+ policy = get_current_policy();
+ if (policy == SCHED_DEADLINE) {
+ printf(" PASS: Remained SCHED_DEADLINE (throttled, not demoted)\n");
+ trace_write("TEST4: PASS - Remained SCHED_DEADLINE");
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST4: END");
+ return 0;
+ } else {
+ printf(" FAIL: Changed to policy %d without demotion flag\n", policy);
+ trace_write("TEST4: FAIL - Changed to policy %d", policy);
+ attr.sched_policy = SCHED_NORMAL;
+ sys_sched_setattr(0, &attr, 0);
+ trace_write("TEST4: END");
+ return -1;
+ }
+}
+
+int main(void)
+{
+ int failures = 0;
+
+ printf("SCHED_DEADLINE Demotion Tests\n");
+ printf("==============================\n\n");
+
+ /* Initialize ftrace marker (silently fails if not available) */
+ trace_marker_init();
+ trace_write("=== SCHED_DEADLINE Demotion Test Suite START ===");
+
+ /* Run tests with pauses between them for clearer trace separation */
+ if (test_basic_demotion() < 0)
+ failures++;
+
+ /* Pause between tests (300ms - longer than any test period) */
+ printf("\n--- Pausing 300ms between tests ---\n");
+ trace_write("=== PAUSE between tests (300ms) ===");
+ usleep(300 * 1000);
+
+ if (test_promotion_on_timer() < 0)
+ failures++;
+
+ printf("\n--- Pausing 300ms between tests ---\n");
+ trace_write("=== PAUSE between tests (300ms) ===");
+ usleep(300 * 1000);
+
+ if (test_param_change_while_demoted() < 0)
+ failures++;
+
+ printf("\n--- Pausing 300ms between tests ---\n");
+ trace_write("=== PAUSE between tests (300ms) ===");
+ usleep(300 * 1000);
+
+ if (test_no_demotion_without_flag() < 0)
+ failures++;
+
+ /* Summary */
+ printf("\n==============================\n");
+ if (failures == 0) {
+ printf("All tests PASSED\n");
+ trace_write("=== Test Suite PASSED ===");
+ trace_marker_close();
+ return 0;
+ } else {
+ printf("%d test(s) FAILED\n", failures);
+ trace_write("=== Test Suite FAILED (%d failures) ===", failures);
+ trace_marker_close();
+ return 1;
+ }
+}
diff --git a/tools/testing/selftests/sched/run_dl_demotion_with_trace.sh b/tools/testing/selftests/sched/run_dl_demotion_with_trace.sh
new file mode 100755
index 0000000000000..4b37864d45975
--- /dev/null
+++ b/tools/testing/selftests/sched/run_dl_demotion_with_trace.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run SCHED_DEADLINE demotion tests with ftrace enabled to see
+# state machine transitions
+
+TRACE_DIR="/sys/kernel/debug/tracing"
+TEST_BIN="./dl_demotion_test"
+
+if [ ! -d "$TRACE_DIR" ]; then
+ echo "ERROR: ftrace not available at $TRACE_DIR"
+ echo "Make sure debugfs is mounted and CONFIG_FTRACE is enabled"
+ exit 1
+fi
+
+if [ $EUID -ne 0 ]; then
+ echo "ERROR: This script must be run as root"
+ exit 1
+fi
+
+if [ ! -x "$TEST_BIN" ]; then
+ echo "ERROR: Test binary not found: $TEST_BIN"
+ echo "Build with: make"
+ exit 1
+fi
+
+echo "Setting up ftrace..."
+
+# Clear previous trace
+echo 0 > "$TRACE_DIR/tracing_on"
+echo > "$TRACE_DIR/trace"
+
+# Enable trace_printk
+echo 1 > "$TRACE_DIR/options/trace_printk" 2>/dev/null || true
+
+# Enable sched events
+echo 1 > "$TRACE_DIR/events/sched/enable" 2>/dev/null || true
+
+# Start tracing
+echo 1 > "$TRACE_DIR/tracing_on"
+
+echo "Running deadline demotion tests..."
+echo "===================================="
+echo ""
+
+# Run the test
+$TEST_BIN
+
+echo ""
+echo "===================================="
+echo ""
+
+# Stop tracing
+echo 0 > "$TRACE_DIR/tracing_on"
+
+# Show relevant trace entries
+echo "Trace output (demotion/promotion events):"
+echo "=========================================="
+grep -E "dl_demote|dl_promote|dl_timer|switched_from_dl|switched_to_dl|setscheduler" \
+ "$TRACE_DIR/trace" | tail -100
+
+echo ""
+echo "Full trace saved to: /tmp/dl_demotion_trace.txt"
+cat "$TRACE_DIR/trace" > /tmp/dl_demotion_trace.txt
+
+# Reset tracing
+echo 0 > "$TRACE_DIR/events/sched/enable" 2>/dev/null || true
+echo > "$TRACE_DIR/trace"
+
+echo ""
+echo "Done!"
--
2.53.0