Re: [RFC][PATCH 13/15] sched/fair: Implement latency-nice

From: Benjamin Segall
Date: Wed Oct 11 2023 - 19:24:45 EST


Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes:

> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -952,6 +952,21 @@ int sched_update_scaling(void)
> }
> #endif
>
> +void set_latency_fair(struct sched_entity *se, int prio)
> +{
> + u32 weight = sched_prio_to_weight[prio];
> + u64 base = sysctl_sched_base_slice;
> +
> + /*
> + * For EEVDF the virtual time slope is determined by w_i (iow.
> + * nice) while the request time r_i is determined by
> + * latency-nice.
> + *
> + * Smaller request gets better latency.
> + */
> + se->slice = div_u64(base << SCHED_FIXEDPOINT_SHIFT, weight);
> +}
> +
> static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
>
> /*


This seems questionable in combination with the earlier changes that
make things use se->slice by itself as the expected time slice:


> @@ -6396,13 +6629,12 @@ static inline void unthrottle_offline_cf
> static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
> {
> struct sched_entity *se = &p->se;
> - struct cfs_rq *cfs_rq = cfs_rq_of(se);
>
> SCHED_WARN_ON(task_rq(p) != rq);
>
> if (rq->cfs.h_nr_running > 1) {
> - u64 slice = sched_slice(cfs_rq, se);
> u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> + u64 slice = se->slice;
> s64 delta = slice - ran;
>
> if (delta < 0) {
> @@ -12136,8 +12382,8 @@ static void rq_offline_fair(struct rq *r
> static inline bool
> __entity_slice_used(struct sched_entity *se, int min_nr_tasks)
> {
> - u64 slice = sched_slice(cfs_rq_of(se), se);
> u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> + u64 slice = se->slice;
>
> return (rtime * min_nr_tasks > slice);
> }
> @@ -12832,7 +13078,7 @@ static unsigned int get_rr_interval_fair
> * idle runqueue:
> */
> if (rq->cfs.load.weight)
> - rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
> + rr_interval = NS_TO_JIFFIES(se->slice);
>
> return rr_interval;
> }

We probably do not want a task with normal weight and low latency-weight
(aka high latency / latency-nice value) to be expected to have a very
very high slice value for some of these. get_rr_interval_fair is
whatever, it's not really a number that exists, and CONFIG_SCHED_CORE
isn't updated for EEVDF at all, but HRTICK at least probably should be
updated. Having such a task run for 68 times normal seems likely to have
far worse latency effects than any gains from other parts.