[PATCH -v2 06/18] sched/fair: Move enqueue migrate handling

From: Peter Zijlstra
Date: Fri Sep 01 2017 - 09:35:36 EST


Move the entity migrate handling from enqueue_entity_load_avg() to
update_load_avg(). This has two benefits:

- {en,de}queue_entity_load_avg() will become purely about managing
runnable_load

- we can avoid a double update_tg_load_avg() and reduce pressure on
the global tg->shares cacheline

The reason we do this is so that we can change update_cfs_shares() to
change both weight and (future) runnable_weight. For this to work we
need to have the cfs_rq averages up-to-date (which means having done
the attach), but we need the cfs_rq->avg.runnable_avg to not yet
include the se's contribution (since se->on_rq == 0).

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 70 ++++++++++++++++++++++++++--------------------------
1 file changed, 36 insertions(+), 34 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3491,34 +3491,6 @@ update_cfs_rq_load_avg(u64 now, struct c
return decayed || removed_load;
}

-/*
- * Optional action to be done while updating the load average
- */
-#define UPDATE_TG 0x1
-#define SKIP_AGE_LOAD 0x2
-
-/* Update task and its cfs_rq load average */
-static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
- u64 now = cfs_rq_clock_task(cfs_rq);
- struct rq *rq = rq_of(cfs_rq);
- int cpu = cpu_of(rq);
- int decayed;
-
- /*
- * Track task load average for carrying it to new CPU after migrated, and
- * track group sched_entity load average for task_h_load calc in migration
- */
- if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
- __update_load_avg_se(now, cpu, cfs_rq, se);
-
- decayed = update_cfs_rq_load_avg(now, cfs_rq);
- decayed |= propagate_entity_load_avg(se);
-
- if (decayed && (flags & UPDATE_TG))
- update_tg_load_avg(cfs_rq, 0);
-}
-
/**
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
* @cfs_rq: cfs_rq to attach to
@@ -3559,17 +3531,46 @@ static void detach_entity_load_avg(struc
cfs_rq_util_change(cfs_rq);
}

+/*
+ * Optional action to be done while updating the load average
+ */
+#define UPDATE_TG 0x1
+#define SKIP_AGE_LOAD 0x2
+#define DO_ATTACH 0x4
+
+/* Update task and its cfs_rq load average */
+static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
+ u64 now = cfs_rq_clock_task(cfs_rq);
+ struct rq *rq = rq_of(cfs_rq);
+ int cpu = cpu_of(rq);
+ int decayed;
+
+ /*
+ * Track task load average for carrying it to new CPU after migrated, and
+ * track group sched_entity load average for task_h_load calc in migration
+ */
+ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
+ __update_load_avg_se(now, cpu, cfs_rq, se);
+
+ decayed = update_cfs_rq_load_avg(now, cfs_rq);
+ decayed |= propagate_entity_load_avg(se);
+
+ if (!se->avg.last_update_time && (flags & DO_ATTACH)) {
+
+ attach_entity_load_avg(cfs_rq, se);
+ update_tg_load_avg(cfs_rq, 0);
+
+ } else if (decayed && (flags & UPDATE_TG))
+ update_tg_load_avg(cfs_rq, 0);
+}
+
/* Add the load generated by se into cfs_rq's load average */
static inline void
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
cfs_rq->runnable_load_avg += se->avg.load_avg;
cfs_rq->runnable_load_sum += se_weight(se) * se->avg.load_sum;
-
- if (!se->avg.last_update_time) {
- attach_entity_load_avg(cfs_rq, se);
- update_tg_load_avg(cfs_rq, 0);
- }
}

/* Remove the runnable load generated by se from cfs_rq's runnable load average */
@@ -3659,6 +3660,7 @@ update_cfs_rq_load_avg(u64 now, struct c

#define UPDATE_TG 0x0
#define SKIP_AGE_LOAD 0x0
+#define DO_ATTACH 0x0

static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
{
@@ -3813,7 +3815,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
* its group cfs_rq
* - Add its new weight to cfs_rq->load.weight
*/
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
enqueue_entity_load_avg(cfs_rq, se);
update_cfs_shares(se);
account_entity_enqueue(cfs_rq, se);