Re: [REGRESSION] Re: [PATCH 00/24] Complete EEVDF

From: Peter Zijlstra
Date: Fri Nov 29 2024 - 04:09:05 EST


On Thu, Nov 28, 2024 at 12:37:14PM +0100, Marcel Ziswiler wrote:

> > Oooh, that's something. So far the few reports have not been (easily)
> > reproducible. If this is readily reproducible on arm64 that would
> > help a lot. Juri, do you have access to an arm64 test box?
>
> As mentioned above, so far our scheduler stress test is not yet open source but Codethink is eager to share
> anything which helps in resolving this.

I was hoping you could perhaps share a binary with Juri privately or
with RHT (same difference etc), such that he can poke at it too.

Anyway, if you don't mind a bit of back and forth, would you mind adding
the below patch to your kernel and doing:

(all assuming your kernel has ftrace enabled)

echo 1 > /sys/kernel/debug/tracing/options/stacktrace
echo 1 > /proc/sys/kernel/traceoff_on_warning

running your test to failure and then dumping the trace into a file
like:

cat /sys/kernel/debug/tracing/trace > ~/trace

Then compress the file (bzip2 or whatever is popular these days) and
send it my way along with a dmesg dump (private is fine -- these things
tend to be large-ish).

Hopefully, this will give us a little clue as to where the double
enqueue happens.

---
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d9d5a702f1a6..b9cd9b40a19f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1203,6 +1203,11 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
scoped_guard (rq_lock, rq) {
struct rq_flags *rf = &scope.rf;

+ if (dl_se == &rq->fair_server) {
+ trace_printk("timer fair server %d throttled %d\n",
+ cpu_of(rq), dl_se->dl_throttled);
+ }
+
if (!dl_se->dl_throttled || !dl_se->dl_runtime)
return HRTIMER_NORESTART;

@@ -1772,6 +1777,9 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
rq_lock(rq, &rf);
}

+ if (dl_se == &rq->fair_server)
+ trace_printk("inactive fair server %d\n", cpu_of(rq));
+
sched_clock_tick();
update_rq_clock(rq);

@@ -1967,6 +1975,12 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+ struct rq *rq = rq_of_dl_se(dl_se);
+
+ if (dl_se == &rq->fair_server) {
+ trace_printk("enqueue fair server %d h_nr_running %d\n",
+ cpu_of(rq), rq->cfs.h_nr_running);
+ }

WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));

@@ -1978,6 +1992,12 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+ struct rq *rq = rq_of_dl_se(dl_se);
+
+ if (dl_se == &rq->fair_server) {
+ trace_printk("dequeue fair server %d h_nr_running %d\n",
+ cpu_of(rq), rq->cfs.h_nr_running);
+ }

if (RB_EMPTY_NODE(&dl_se->rb_node))
return;