[RFC][PATCH 13/15] sched/fair: Implement latency-nice

From: Peter Zijlstra
Date: Wed May 31 2023 - 08:49:41 EST


Implement latency-nice as a modulation of the EEVDF r_i parameter,
specifically apply the inverse sched_prio_to_weight[] relation on
base_slice.

Given a base slice of 3 [ms], this gives a range of:

latency-nice 19: 3*1024 / 15 ~= 204.8 [ms]
latency-nice -20: 3*1024 / 88761 ~= 0.034 [ms]

(which might not make sense)

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Tested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
---
kernel/sched/core.c | 14 ++++++++++----
kernel/sched/fair.c | 22 +++++++++++++++-------
kernel/sched/sched.h | 2 ++
3 files changed, 27 insertions(+), 11 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_
}
}

+static inline void set_latency_prio(struct task_struct *p, int prio)
+{
+ p->latency_prio = prio;
+ set_latency_fair(&p->se, prio - MAX_RT_PRIO);
+}
+
#ifdef CONFIG_UCLAMP_TASK
/*
* Serializes updates of utilization clamp values
@@ -4464,9 +4470,10 @@ static void __sched_fork(unsigned long c
p->se.nr_migrations = 0;
p->se.vruntime = 0;
p->se.vlag = 0;
- p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node);

+ set_latency_prio(p, p->latency_prio);
+
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
#endif
@@ -4718,8 +4725,7 @@ int sched_fork(unsigned long clone_flags

p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false);
-
- p->latency_prio = NICE_TO_PRIO(0);
+ set_latency_prio(p, NICE_TO_PRIO(0));

/*
* We don't need the reset flag anymore after the fork. It has
@@ -7507,7 +7513,7 @@ static void __setscheduler_latency(struc
const struct sched_attr *attr)
{
if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE)
- p->latency_prio = NICE_TO_PRIO(attr->sched_latency_nice);
+ set_latency_prio(p, NICE_TO_PRIO(attr->sched_latency_nice));
}

/*
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -952,6 +952,21 @@ int sched_update_scaling(void)
}
#endif

+void set_latency_fair(struct sched_entity *se, int prio)
+{
+ u32 weight = sched_prio_to_weight[prio];
+ u64 base = sysctl_sched_base_slice;
+
+ /*
+ * For EEVDF the virtual time slope is determined by w_i (iow.
+ * nice) while the request time r_i is determined by
+ * latency-nice.
+ *
+ * Smaller request gets better latency.
+ */
+ se->slice = div_u64(base << SCHED_FIXEDPOINT_SHIFT, weight);
+}
+
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);

/*
@@ -964,13 +979,6 @@ static void update_deadline(struct cfs_r
return;

/*
- * For EEVDF the virtual time slope is determined by w_i (iow.
- * nice) while the request time r_i is determined by
- * sysctl_sched_base_slice.
- */
- se->slice = sysctl_sched_base_slice;
-
- /*
* EEVDF: vd_i = ve_i + r_i / w_i
*/
se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2495,6 +2495,8 @@ extern unsigned int sysctl_numa_balancin
extern unsigned int sysctl_numa_balancing_hot_threshold;
#endif

+extern void set_latency_fair(struct sched_entity *se, int prio);
+
#ifdef CONFIG_SCHED_HRTICK

/*