[PATCH 06/15] sched: Commit to lag based placement

From: Peter Zijlstra
Date: Wed May 31 2023 - 08:49:16 EST


Removes the FAIR_SLEEPERS code in favour of the new LAG based
placement.

Specifically, the whole FAIR_SLEEPER thing was a very crud
approximation to make up for the lack of lag based placement,
specifically the 'service owed' part. This is important for things
like 'starve' and 'hackbench'.

One side effect of FAIR_SLEEPER is that is caused 'small' unfairness,
specifically, by always ignoring up-to 'thresh' sleeptime it would
have a 50%/50% time distribution for a 50% sleeper vs a 100% runner,
while strictly speaking this should (of course) result in a 33%/67%
split (as CFS will also do if the sleep period exceeds 'thresh').

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 59 ------------------------------------------------
kernel/sched/features.h | 8 ------
2 files changed, 1 insertion(+), 66 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5068,29 +5068,6 @@ static void check_spread(struct cfs_rq *
#endif
}

-static inline bool entity_is_long_sleeper(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq;
- u64 sleep_time;
-
- if (se->exec_start == 0)
- return false;
-
- cfs_rq = cfs_rq_of(se);
-
- sleep_time = rq_clock_task(rq_of(cfs_rq));
-
- /* Happen while migrating because of clock task divergence */
- if (sleep_time <= se->exec_start)
- return false;
-
- sleep_time -= se->exec_start;
- if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
- return true;
-
- return false;
-}
-
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
@@ -5172,43 +5149,9 @@ place_entity(struct cfs_rq *cfs_rq, stru
if (WARN_ON_ONCE(!load))
load = 1;
lag = div_s64(lag, load);
-
- vruntime -= lag;
- }
-
- if (sched_feat(FAIR_SLEEPERS)) {
-
- /* sleeps up to a single latency don't count. */
- if (!initial) {
- unsigned long thresh;
-
- if (se_is_idle(se))
- thresh = sysctl_sched_min_granularity;
- else
- thresh = sysctl_sched_latency;
-
- /*
- * Halve their sleep time's effect, to allow
- * for a gentler effect of sleepers:
- */
- if (sched_feat(GENTLE_FAIR_SLEEPERS))
- thresh >>= 1;
-
- vruntime -= thresh;
- }
-
- /*
- * Pull vruntime of the entity being placed to the base level of
- * cfs_rq, to prevent boosting it if placed backwards. If the entity
- * slept for a long time, don't even try to compare its vruntime with
- * the base as it may be too far off and the comparison may get
- * inversed due to s64 overflow.
- */
- if (!entity_is_long_sleeper(se))
- vruntime = max_vruntime(se->vruntime, vruntime);
}

- se->vruntime = vruntime;
+ se->vruntime = vruntime - lag;

/*
* When joining the competition; the exisiting tasks will be,
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,14 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */

/*
- * Only give sleepers 50% of their service deficit. This allows
- * them to run sooner, but does not allow tons of sleepers to
- * rip the spread apart.
- */
-SCHED_FEAT(FAIR_SLEEPERS, false)
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
-
-/*
* Using the avg_vruntime, do the right thing and preserve lag across
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/