Re: [PATCH 04/11] sched_ext: Fix processs_ddsp_deferred_locals() by unifying DTL_INVALID handling

From: David Vernet
Date: Sun Sep 01 2024 - 11:36:05 EST


On Sat, Aug 31, 2024 at 10:03:58PM -1000, Tejun Heo wrote:
> Hello,
>
> On Sat, Aug 31, 2024 at 07:56:39PM -0500, David Vernet wrote:
> ...
> > Sorry, should have been more clear: the testcase dispatched all tasks to
> > the wrong CPU, which is why it's a kworker in the print output below. I
> > believe that ksoftiqrd hit the same path as well and just wasn't printed
> > in the output because it lost the race to scx_bpf_error(). Let me know
> > if you want the testcase to repro and I can send it, or send a separate
> > patch to add it to selftests.
>
> Yeah, please share the repro.

See the attached patch. You can run the test as follows after rebuilding
selftests:

./runner -t ddsp_local_on_invalid

You may have to run the test a few times to see the repro. Worth noting
is that the repro doesn't seem to hit if we don't explicitly set the
fallback target to 0 in ddsp_local_on_invalid_enqueue().

Thanks,
David
From 5e1a850b7db989429b94ea5d9cdf786faf3bcd4f Mon Sep 17 00:00:00 2001
From: David Vernet <void@xxxxxxxxxxxxx>
Date: Sat, 31 Aug 2024 19:16:22 -0500
Subject: [PATCH] scx: Add test validating direct dispatch with LOCAL_ON

SCX_DSQ_LOCAL_ON | cpu can now be invoked on the direct dispatch path.
Let's ensure we're gracefully handling it being dispatched to a CPU
that it's not allowed to run on.

Signed-off-by: David Vernet <void@xxxxxxxxxxxxx>
---
tools/testing/selftests/sched_ext/Makefile | 1 +
.../sched_ext/ddsp_local_on_invalid.bpf.c | 42 ++++++++++++++
.../sched_ext/ddsp_local_on_invalid.c | 58 +++++++++++++++++++
3 files changed, 101 insertions(+)
create mode 100644 tools/testing/selftests/sched_ext/ddsp_local_on_invalid.bpf.c
create mode 100644 tools/testing/selftests/sched_ext/ddsp_local_on_invalid.c

diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 0754a2c110a1..4823a67e6854 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -165,6 +165,7 @@ auto-test-targets := \
enq_last_no_enq_fails \
enq_select_cpu_fails \
ddsp_bogus_dsq_fail \
+ ddsp_local_on_invalid \
ddsp_vtimelocal_fail \
dsp_local_on \
exit \
diff --git a/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.bpf.c b/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.bpf.c
new file mode 100644
index 000000000000..e4512d7cc4b5
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.bpf.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2024 David Vernet <dvernet@xxxxxxxx>
+ */
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+const volatile s32 nr_cpus;
+
+UEI_DEFINE(uei);
+
+s32 BPF_STRUCT_OPS(ddsp_local_on_invalid_select_cpu, struct task_struct *p,
+ s32 prev_cpu, u64 wake_flags)
+{
+ return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(ddsp_local_on_invalid_enqueue, struct task_struct *p,
+ u64 enq_flags)
+{
+ int target = bpf_cpumask_first_zero(p->cpus_ptr);
+
+ if (target >= nr_cpus)
+ target = 0;
+
+ scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(ddsp_local_on_invalid_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops ddsp_local_on_invalid_ops = {
+ .select_cpu = ddsp_local_on_invalid_select_cpu,
+ .enqueue = ddsp_local_on_invalid_enqueue,
+ .exit = ddsp_local_on_invalid_exit,
+ .name = "ddsp_local_on_invalid",
+ .timeout_ms = 2000U,
+};
diff --git a/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.c b/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.c
new file mode 100644
index 000000000000..7bc49df06ee0
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/ddsp_local_on_invalid.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2024 David Vernet <dvernet@xxxxxxxx>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <unistd.h>
+#include "ddsp_local_on_invalid.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct ddsp_local_on_invalid *skel;
+
+ skel = ddsp_local_on_invalid__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+ SCX_FAIL_IF(ddsp_local_on_invalid__load(skel), "Failed to load skel");
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct ddsp_local_on_invalid *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ddsp_local_on_invalid_ops);
+ SCX_FAIL_IF(!link, "Failed to attach struct_ops");
+
+ /* Just sleeping is fine, plenty of scheduling events happening */
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct ddsp_local_on_invalid *skel = ctx;
+
+ ddsp_local_on_invalid__destroy(skel);
+}
+
+struct scx_test ddsp_local_on_invalid = {
+ .name = "ddsp_local_on_invalid",
+ .description = "Verify we can gracefully handle direct dispatch "
+ "of tasks to an invalid local DSQ from osp.dispatch()",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&ddsp_local_on_invalid)
--
2.45.2

Attachment: signature.asc
Description: PGP signature