Re: [PATCH 6/6 v3] sched/eevdf: Speedup short slice task scheduling
From: Peter Zijlstra
Date: Thu Jun 25 2026 - 18:30:18 EST
On Thu, Jun 25, 2026 at 02:59:16PM +0200, Vincent Guittot wrote:
> On Thu, 25 Jun 2026 at 14:57, Vincent Guittot
> <vincent.guittot@xxxxxxxxxx> wrote:
> >
> > On Thu, 25 Jun 2026 at 12:10, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> > >
> > > On Thu, Jun 25, 2026 at 10:37:20AM +0200, Peter Zijlstra wrote:
> > > > On Thu, Jun 25, 2026 at 01:07:43PM +0530, K Prateek Nayak wrote:
> > > >
> > > > > > +static u64 eligible_vruntime(struct cfs_rq *cfs_rq, struct sched_entity *se)
> > > > > > +{
> > > > > > + struct sched_entity *curr = cfs_rq->curr;
> > > > >
> > > > > curr seems to be unused here and is NULL anyways when
> > > > > set_protect_slice() is called ;-)
> > > >
> > > > Ah, but it is not with the flat patches on, which is why I was a little
> > > > confused ;-)
> > > >
> > > > That said; I now see se == curr. So let me go have another look at all
> > > > that.
> > >
> > > I might be slow -- it is definitely waay to warm already -- but I'm not
> > > seeing how you don't want avg_vruntime() here.
> >
> > It is somehow related to avg_vruntime() except that I don't want the
> > current avg_vruntime but the avg_vruntime when entity_key(se) will be
> > null and se will become ineligible
> >
> > If I use current avg_vruntime(), once se will have run enough to get
> > its vruntime == (now old) avg_vruntime, the new avg_vruntime will have
> > move forward and the se's vruntime will still be eligible
>
> And I should name it ineligible_vruntime because of the +1
I've ended up with something like so.
---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -777,6 +777,67 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
return cfs_rq->zero_vruntime;
}
+/*
+ * \Sum (v_i - v0)*w_i
+ * V = ------------------- + v0
+ * \Sum w_i
+ *
+ * Let W = \Sum w_i, and move v_j such that 'v_j == V', thus:
+ *
+ * V = 1/W * {(v_j - v0)*w_j + \Sum_i!=j (v_i - v0)*w_i} + v0
+ *
+ * v_j = 1/W * {(v_j - v0)*w_j + \Sum_i!=j (v_i - v0)*w_i} + v0
+ *
+ * v_j = 1/W * (v_j - v0)*w_j + 1/W * \Sum_i!=j (v_i - v0)*w_i + v0
+ *
+ * v_j - 1/W * (v_j - v0)*w_j = 1/W * \Sum_i!=j (v_i - v0)*w_i + v0
+ *
+ * v_j*W - (v_j - v0)*w_j = \Sum_i!=j (v_i - v0)*w_i + v0*W
+ *
+ * v_j*(W - w_j) + v0*w_j = \Sum_i!=j (v_i - v0)*w_i + v0*W
+ *
+ * v_j*(W - w_j) = \Sum_i!=j (v_i - v0)*w_i + v0*(W - w_j)
+ *
+ * \Sum_i!=j (v_i - v0)*w_i
+ * v_j = ------------------------ + v0
+ * W - w_j
+ *
+ * When v_j happens to be curr, then '\Sum_i!=j (v_i - v0)*w_i'
+ * is cfs_rq->sum_w_runtime, and 'W - w_j' is cfs_rq->sum_weight, since curr
+ * is not included in the sum.
+ */
+static u64 ineligible_vruntime(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ long weight = cfs_rq->sum_weight;
+ s64 delta = 0;
+
+ if (curr && !curr->on_rq)
+ curr = NULL;
+
+ /*
+ * This is called from set_next_task_fair(.first=true) /
+ * set_protect_slice() so curr had better be set and on_rq.
+ */
+ WARN_ON_ONCE(!curr);
+
+ if (weight) {
+ s64 runtime = cfs_rq->sum_w_vruntime;
+
+ /*
+ * Do not add @curr to obtain the effective '- w_j' terms.
+ */
+
+ /* sign flips effective floor / ceiling */
+ if (runtime < 0)
+ runtime -= (weight - 1);
+
+ delta = div64_long(runtime, weight);
+ }
+
+ return cfs_rq->zero_vruntime + delta + 1;
+}
+
static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq);
/*
@@ -1058,8 +1119,14 @@ static inline void set_protect_slice(str
slice = cfs_rq_min_slice(cfs_rq);
slice = min(slice, se->slice);
- if (slice != se->slice)
- vprot = min_vruntime(vprot, se->vruntime + calc_delta_fair(slice, se));
+
+ /* If there are shorter slices than se's one */
+ if (slice != se->slice) {
+ if (sched_feat(PREEMPT_SHORT))
+ vprot = min_vruntime(vprot, ineligible_vruntime(cfs_rq));
+ else
+ vprot = min_vruntime(vprot, se->vruntime + calc_delta_fair(slice, se));
+ }
se->vprot = vprot;
}