[RFC][PATCH 03/14] sched/fair: Remove se->load.weight from se->avg.load_sum

From: Peter Zijlstra
Date: Fri May 12 2017 - 13:29:46 EST


Remove the load from the load_sum for sched_entities, basically
turning load_sum into runnable_sum. This prepares for better
reweighting of group entities.

Since we now have different rules for computing load_avg, split
___update_load_avg() into two parts, ___update_load_sum() and
___update_load_avg().

So for se:

___update_load_sum(.weight = 1)
___upate_load_avg(.weight = se->load.weight)

and for cfs_rq:

___update_load_sum(.weight = cfs_rq->load.weight)
___upate_load_avg(.weight = 1)

Since the primary consumable is load_avg, most things will not be
affected. Only those few sites that initialize/modify load_sum need
attention.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 91 ++++++++++++++++++++++++++++++++++++----------------
1 file changed, 64 insertions(+), 27 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -744,7 +744,7 @@ void init_entity_runnable_average(struct
*/
if (entity_is_task(se))
sa->load_avg = scale_load_down(se->load.weight);
- sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
+ sa->load_sum = LOAD_AVG_MAX;
/*
* At this point, util_avg won't be used in select_task_rq_fair anyway
*/
@@ -1967,7 +1967,7 @@ static u64 numa_get_avg_runtime(struct t
delta = runtime - p->last_sum_exec_runtime;
*period = now - p->last_task_numa_placement;
} else {
- delta = p->se.avg.load_sum / p->se.load.weight;
+ delta = p->se.avg.load_sum;
*period = LOAD_AVG_MAX;
}

@@ -2872,8 +2872,8 @@ accumulate_sum(u64 delta, int cpu, struc
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
*/
static __always_inline int
-___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
- unsigned long weight, int running, struct cfs_rq *cfs_rq)
+___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
+ unsigned long weight, int running, struct cfs_rq *cfs_rq)
{
u64 delta;

@@ -2907,39 +2907,80 @@ ___update_load_avg(u64 now, int cpu, str
if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
return 0;

+ return 1;
+}
+
+static __always_inline void
+___update_load_avg(struct sched_avg *sa, unsigned long weight, struct cfs_rq *cfs_rq)
+{
+ u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
+
/*
* Step 2: update *_avg.
*/
- sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
+ sa->load_avg = div_u64(weight * sa->load_sum, divider);
if (cfs_rq) {
cfs_rq->runnable_load_avg =
- div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
+ div_u64(cfs_rq->runnable_load_sum, divider);
}
- sa->util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib);
+ sa->util_avg = sa->util_sum / divider;
+}

- return 1;
+/*
+ * XXX we want to get rid of this helper and use the full load resolution.
+ */
+static inline long se_weight(struct sched_entity *se)
+{
+ return scale_load_down(se->load.weight);
}

+/*
+ * sched_entity:
+ *
+ * load_sum := runnable_sum
+ * load_avg = se_weight(se) * runnable_avg
+ *
+ * cfq_rs:
+ *
+ * load_sum = \Sum se_weight(se) * se->avg.load_sum
+ * load_avg = \Sum se->avg.load_avg
+ */
+
static int
__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
{
- return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL);
+ if (___update_load_sum(now, cpu, &se->avg, 0, 0, NULL)) {
+ ___update_load_avg(&se->avg, se_weight(se), NULL);
+ return 1;
+ }
+
+ return 0;
}

static int
__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- return ___update_load_avg(now, cpu, &se->avg,
- se->on_rq * scale_load_down(se->load.weight),
- cfs_rq->curr == se, NULL);
+ if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq,
+ cfs_rq->curr == se, NULL)) {
+
+ ___update_load_avg(&se->avg, se_weight(se), NULL);
+ return 1;
+ }
+
+ return 0;
}

static int
__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
{
- return ___update_load_avg(now, cpu, &cfs_rq->avg,
- scale_load_down(cfs_rq->load.weight),
- cfs_rq->curr != NULL, cfs_rq);
+ if (___update_load_sum(now, cpu, &cfs_rq->avg,
+ scale_load_down(cfs_rq->load.weight),
+ cfs_rq->curr != NULL, cfs_rq)) {
+ ___update_load_avg(&cfs_rq->avg, 1, cfs_rq);
+ return 1;
+ }
+
+ return 0;
}

/*
@@ -3110,7 +3151,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq

/* Set new sched_entity's load */
se->avg.load_avg = load;
- se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;
+ se->avg.load_sum = LOAD_AVG_MAX;

/* Update parent cfs_rq load */
add_positive(&cfs_rq->avg.load_avg, delta);
@@ -3340,7 +3381,7 @@ static void attach_entity_load_avg(struc
{
se->avg.last_update_time = cfs_rq->avg.last_update_time;
cfs_rq->avg.load_avg += se->avg.load_avg;
- cfs_rq->avg.load_sum += se->avg.load_sum;
+ cfs_rq->avg.load_sum += se_weight(se) * se->avg.load_sum;
cfs_rq->avg.util_avg += se->avg.util_avg;
cfs_rq->avg.util_sum += se->avg.util_sum;
set_tg_cfs_propagate(cfs_rq);
@@ -3360,7 +3401,7 @@ static void detach_entity_load_avg(struc
{

sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
- sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
+ sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
set_tg_cfs_propagate(cfs_rq);
@@ -3372,12 +3413,10 @@ static void detach_entity_load_avg(struc
static inline void
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- struct sched_avg *sa = &se->avg;
-
- cfs_rq->runnable_load_avg += sa->load_avg;
- cfs_rq->runnable_load_sum += sa->load_sum;
+ cfs_rq->runnable_load_avg += se->avg.load_avg;
+ cfs_rq->runnable_load_sum += se_weight(se) * se->avg.load_sum;

- if (!sa->last_update_time) {
+ if (!se->avg.last_update_time) {
attach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq, 0);
}
@@ -3387,10 +3426,8 @@ enqueue_entity_load_avg(struct cfs_rq *c
static inline void
dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- cfs_rq->runnable_load_avg =
- max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
- cfs_rq->runnable_load_sum =
- max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
+ sub_positive(&cfs_rq->runnable_load_avg, se->avg.load_avg);
+ sub_positive(&cfs_rq->runnable_load_sum, se_weight(se) * se->avg.load_sum);
}

#ifndef CONFIG_64BIT