Re: [PATCH 0/4] sched: Various reweight_entity() fixes
From: K Prateek Nayak
Date: Wed Feb 04 2026 - 05:18:20 EST
Hello Peter,
On 2/3/2026 5:49 PM, K Prateek Nayak wrote:
>> I'm not sure I have valuable tracing patches, I just stick random
>> trace_printk()s in.
>
> I'll plop those in and update once the I get a log for sum_shift++.
Here is one set of log:
# schbench enqueue
schbench-103551 [255] ... : place_entity: Placed se: weight(1048576) vruntime(722711379921) vlag(2140867) deadline(722714179921) curr?(0)
schbench-103551 [255] ... : place_entity: Placed on cfs_rq: depth(0) weight(4194304) nr_queued(4) sum_w_vruntime(0) sum_weight(3145728) zero_vruntime(722714445432) sum_shift(0) avg_vruntime(722714056004)
schbench-103551 [255] ... : __enqueue_entity: Enqueue cfs_rq: depth(0) weight(5242880) nr_queued(5) sum_w_vruntime(663820959744) sum_weight(4194304) zero_vruntime(722713520787) sum_shift(0) avg_vruntime(722713520787)
# Couple of reweight while running
schbench-103551 [255] ... : reweight_entity: Reweight before se: weight(3459) vruntime(701806887728588) vlag(0) deadline(701807851411101) curr?(1)
schbench-103551 [255] ... : reweight_entity: Before cfs_rq: depth(-1) weight(3459) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701806887728588)
schbench-103551 [255] ... : reweight_entity: Reweight after se: weight(3505) vruntime(701806939248075) vlag(0) deadline(701807839439774) curr?(1)
schbench-103551 [255] ... : reweight_entity: After cfs_rq: depth(-1) weight(3505) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701806939248075)
schbench-103551 [255] ... : reweight_entity: Reweight before se: weight(3505) vruntime(701808246440736) vlag(0) deadline(701809202174069) curr?(1)
schbench-103551 [255] ... : reweight_entity: Before cfs_rq: depth(-1) weight(3505) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701808246440736)
schbench-103551 [255] ... : reweight_entity: Reweight after se: weight(3513) vruntime(701808246440736) vlag(0) deadline(701809199997619) curr?(1)
schbench-103551 [255] ... : reweight_entity: After cfs_rq: depth(-1) weight(3513) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701808246440736)
# put_prev_entity?
schbench-103551 [255] ... : __enqueue_entity: Enqueue cfs_rq: depth(0) weight(5242880) nr_queued(5) sum_w_vruntime(-2130969624576) sum_weight(5242880) zero_vruntime(722714695180) sum_shift(0) avg_vruntime(722714695180)
# set_next_entity?
schbench-103551 [255] ... : __dequeue_entity: Dequeue cfs_rq: depth(0) weight(5242880) nr_queued(5) sum_w_vruntime(0) sum_weight(4194304) zero_vruntime(722715015932) sum_shift(0) avg_vruntime(722715015932)
# More reweight
<...>-102371 [255] ... : reweight_entity: Reweight before se: weight(3513) vruntime(701809611552543) vlag(0) deadline(701810567285876) curr?(1)
<...>-102371 [255] ... : reweight_entity: Before cfs_rq: depth(-1) weight(3513) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701809611552543)
<...>-102371 [255] ... : reweight_entity: Reweight after se: weight(3508) vruntime(701809611552543) vlag(0) deadline(701810568648095) curr?(1)
<...>-102371 [255] ... : reweight_entity: After cfs_rq: depth(-1) weight(3508) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701809611552543)
<...>-102371 [255] ... : place_entity: Placed se: weight(90891264) vruntime(701808975077099) vlag(24732) deadline(701808975109401) curr?(0)
<...>-102371 [255] ... : place_entity: Placed on cfs_rq: depth(-1) weight(3508) nr_queued(1) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701809615900788)
# Overflow on enqueue
<...>-102371 [255] ... : __enqueue_entity: Overflowed cfs_rq:
<...>-102371 [255] ... : dump_h_overflow_cfs_rq: cfs_rq: depth(0) weight(90894772) nr_queued(2) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701809615900788)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(3508) vruntime(701809615900788) slice(2800000) deadline(701810568648095) curr?(1) task?(1) <-------- cfs_rq->curr
<...>-102371 [255] ... : __enqueue_entity: Overflowed se:
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(90891264) vruntime(701808975077099) slice(2800000) deadline(701808975109401) curr?(0) task?(0) <-------- new se
# Botched attempt at dumping the whole hierarchy
<...>-102371 [255] ... : __enqueue_entity: Overflowed hierarchy from root:
<...>-102371 [255] ... : dump_h_overflow_cfs_rq: cfs_rq: depth(0) weight(90894772) nr_queued(2) sum_w_vruntime(0) sum_weight(0) zero_vruntime(701164930256050) sum_shift(0) avg_vruntime(701809615900788)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(3508) vruntime(701809615900788) slice(2800000) deadline(701810568648095) curr?(1) task?(1)
<...>-102371 [255] ... : dump_h_overflow_cfs_rq: cfs_rq: depth(1) weight(5242880) nr_queued(5) sum_w_vruntime(0) sum_weight(4194304) zero_vruntime(722715015932) sum_shift(0) avg_vruntime(722715086591)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(1048576) vruntime(722715369227) slice(2800000) deadline(722718169227) curr?(1) task?(0)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(1048576) vruntime(722713453675) slice(2800000) deadline(722716247576) curr?(1) task?(0)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(1048576) vruntime(722713498238) slice(2800000) deadline(722716290797) curr?(1) task?(0)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(1048576) vruntime(722716384383) slice(2800000) deadline(722719172114) curr?(1) task?(0)
<...>-102371 [255] ... : dump_h_overflow_entity: se: weight(1048576) vruntime(722716727432) slice(2800000) deadline(722719517387) curr?(1) task?(0)
Attached is the debug patch. that can be used to interpret this data more.
per-CPU padding is just a variable to add padding for higher depths when
printing the hierarchy.
--
Thanks and Regards,
Prateek
From 8fe3036b04a3a529dc500a9c880e23bcfd1daa42 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <gautham.shenoy@xxxxxxx>
Date: Wed, 4 Feb 2026 10:08:07 +0000
Subject: [PATCH] sched/fair: Debug multiplication overflow
Read the trace buffer whn dmesg says:
EEVDF: Overflow (mul)!
EEVDF: Overflow CPU/<X>
per_cpu/cpu<X>/trace should have all the information.
Not-signed-off-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
---
kernel/sched/fair.c | 83 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 81 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6f1a86f7969a..15d521f795ff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -670,6 +670,53 @@ __sum_w_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
cfs_rq->sum_weight += weight;
}
+static DEFINE_PER_CPU(char[100], dump_padding);
+u64 avg_vruntime(struct cfs_rq *cfs_rq);
+
+static void dump_h_overflow_entity(struct sched_entity *se, int depth, bool curr)
+{
+ char *padding = *this_cpu_ptr(&dump_padding);
+
+ padding[depth] = '\0';
+ trace_printk("%sse: weight(%lu) vruntime(%llu) slice(%llu) deadline(%llu) curr?(%d) task?(%d)\n",
+ padding, se->load.weight, se->vruntime, se->slice, se->deadline, curr, !entity_is_task(se));
+}
+
+static void dump_h_overflow_cfs_rq(struct cfs_rq *cfs_rq, int depth, bool rec)
+{
+ struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline);
+ char *padding = *this_cpu_ptr(&dump_padding);
+
+ padding[depth] = '\0';
+ trace_printk("%scfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ padding, depth, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
+
+ if (cfs_rq->curr)
+ dump_h_overflow_entity(cfs_rq->curr, depth, true);
+
+ while (left) {
+ dump_h_overflow_entity(__node_2_se(left), depth, true);
+ left = rb_next(left);
+ }
+
+ if (rec) {
+ padding[depth] = ' ';
+ if (cfs_rq->curr && !entity_is_task(cfs_rq->curr))
+ dump_h_overflow_cfs_rq(group_cfs_rq(cfs_rq->curr), depth + 1, true);
+
+ left = rb_first_cached(&cfs_rq->tasks_timeline);
+ while (left) {
+ struct sched_entity *se = __node_2_se(left);
+
+ if (!entity_is_task(se))
+ dump_h_overflow_cfs_rq(group_cfs_rq(se), depth + 1, true);
+ left = rb_next(left);
+ }
+ }
+
+ padding[depth] = '\0';
+}
+
static void
sum_w_vruntime_add_paranoid(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
@@ -680,17 +727,32 @@ sum_w_vruntime_add_paranoid(struct cfs_rq *cfs_rq, struct sched_entity *se)
weight = avg_vruntime_weight(cfs_rq, se->load.weight);
key = entity_key(cfs_rq, se);
- if (check_mul_overflow(key, weight, &key))
+ if (check_mul_overflow(key, weight, &key)) {
+ pr_warn("EEVDF: Overflow (mul)!\n");
+ pr_warn("EEVDF: Overflow CPU/%d\n", smp_processor_id());
goto overflow;
+ }
- if (check_add_overflow(cfs_rq->sum_w_vruntime, key, &tmp))
+ if (check_add_overflow(cfs_rq->sum_w_vruntime, key, &tmp)) {
+ pr_warn("EEVDF: Overflow (add)!\n");
+ pr_warn("EEVDF: Overflow CPU/%d\n", smp_processor_id());
goto overflow;
+ }
cfs_rq->sum_w_vruntime = tmp;
cfs_rq->sum_weight += weight;
return;
overflow:
+ trace_printk("Overflowed cfs_rq:\n");
+ dump_h_overflow_cfs_rq(cfs_rq, 0, false);
+
+ trace_printk("Overflowed se:\n");
+ dump_h_overflow_entity(se, 0, se == cfs_rq->curr);
+
+ trace_printk("Overflowed hierarchy from root:\n");
+ dump_h_overflow_cfs_rq(&rq_of(cfs_rq)->cfs, 0, true);
+
/*
* There's gotta be a limit -- if we're still failing at this point
* there's really nothing much to be done about things.
@@ -921,6 +983,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
__entity_less, &min_vruntime_cb);
+ trace_printk("Enqueue cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
}
static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -929,6 +993,8 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
&min_vruntime_cb);
sum_w_vruntime_sub(cfs_rq, se);
update_zero_vruntime(cfs_rq);
+ trace_printk("Dequeue cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -3963,6 +4029,10 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
bool rel_vprot = false;
u64 avruntime = 0;
+ trace_printk("Reweight before se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n",
+ se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr);
+ trace_printk("Before cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
if (se->on_rq) {
/* commit outstanding execution time */
update_curr(cfs_rq);
@@ -3998,6 +4068,10 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
__enqueue_entity(cfs_rq, se);
cfs_rq->nr_queued++;
}
+ trace_printk("Reweight after se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n",
+ se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr);
+ trace_printk("After cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
}
static void reweight_task_fair(struct rq *rq, struct task_struct *p,
@@ -5367,6 +5441,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* EEVDF: vd_i = ve_i + r_i/w_i
*/
se->deadline = se->vruntime + vslice;
+
+ trace_printk("Placed se: weight(%lu) vruntime(%llu) vlag(%lld) deadline(%llu) curr?(%d)\n",
+ se->load.weight, se->vruntime, se->vlag, se->deadline, se == cfs_rq->curr);
+ trace_printk("Placed on cfs_rq: depth(%d) weight(%lu) nr_queued(%u) sum_w_vruntime(%lld) sum_weight(%llu) zero_vruntime(%llu) sum_shift(%u) avg_vruntime(%llu)\n",
+ se->depth - 1, cfs_rq->load.weight, cfs_rq->nr_queued, cfs_rq->sum_w_vruntime, cfs_rq->sum_weight, cfs_rq->zero_vruntime, cfs_rq->sum_shift, avg_vruntime(cfs_rq));
}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
--
2.34.1