[PATCH 3/7 v4] sched: factorize PELT update

From: Vincent Guittot
Date: Mon Sep 26 2016 - 08:20:31 EST


Every time, we modify load/utilization of sched_entity, we start to sync
it with its cfs_rq. This update is done is different ways:
-when attaching/detaching a sched_entity, we update cfs_rq and then we
sync the entity with the cfs_rq.
-when enqueueing/dequeuing the sched_entity, we update both sched_entity
and cfs_rq metrics to now.

Use update_load_avg everytime we have to update and sync cfs_rq and
sched_entity before changing the state of a sched_enity

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 75 +++++++++++++++++------------------------------------
1 file changed, 24 insertions(+), 51 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d29492..625e7f7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3088,8 +3088,14 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
return decayed || removed_load;
}

+/*
+ * Optional action to be done while updating the load average
+ */
+#define UPDATE_TG 0x1
+#define SKIP_AGE_LOAD 0x2
+
/* Update task and its cfs_rq load average */
-static inline void update_load_avg(struct sched_entity *se, int update_tg)
+static inline void update_load_avg(struct sched_entity *se, int flags)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq);
@@ -3100,11 +3106,12 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
* Track task load average for carrying it to new CPU after migrated, and
* track group sched_entity load average for task_h_load calc in migration
*/
- __update_load_avg(now, cpu, &se->avg,
+ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
+ __update_load_avg(now, cpu, &se->avg,
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL);

- if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
+ if (update_cfs_rq_load_avg(now, cfs_rq, true) && (flags & UPDATE_TG))
update_tg_load_avg(cfs_rq, 0);
}

@@ -3118,26 +3125,6 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
*/
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- if (!sched_feat(ATTACH_AGE_LOAD))
- goto skip_aging;
-
- /*
- * If we got migrated (either between CPUs or between cgroups) we'll
- * have aged the average right before clearing @last_update_time.
- *
- * Or we're fresh through post_init_entity_util_avg().
- */
- if (se->avg.last_update_time) {
- __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
- &se->avg, 0, 0, NULL);
-
- /*
- * XXX: we could have just aged the entire load away if we've been
- * absent from the fair class for too long.
- */
- }
-
-skip_aging:
se->avg.last_update_time = cfs_rq->avg.last_update_time;
cfs_rq->avg.load_avg += se->avg.load_avg;
cfs_rq->avg.load_sum += se->avg.load_sum;
@@ -3157,9 +3144,6 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
*/
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
- &se->avg, se->on_rq * scale_load_down(se->load.weight),
- cfs_rq->curr == se, NULL);

sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
@@ -3174,34 +3158,20 @@ static inline void
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct sched_avg *sa = &se->avg;
- u64 now = cfs_rq_clock_task(cfs_rq);
- int migrated, decayed;
-
- migrated = !sa->last_update_time;
- if (!migrated) {
- __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
- se->on_rq * scale_load_down(se->load.weight),
- cfs_rq->curr == se, NULL);
- }
-
- decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);

cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;

- if (migrated)
+ if (!sa->last_update_time) {
attach_entity_load_avg(cfs_rq, se);
-
- if (decayed || migrated)
update_tg_load_avg(cfs_rq, 0);
+ }
}

/* Remove the runnable load generated by se from cfs_rq's runnable load average */
static inline void
dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- update_load_avg(se, 1);
-
cfs_rq->runnable_load_avg =
max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
cfs_rq->runnable_load_sum =
@@ -3275,7 +3245,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
return 0;
}

-static inline void update_load_avg(struct sched_entity *se, int not_used)
+#define UPDATE_TG 0x0
+#define SKIP_AGE_LOAD 0x0
+
+static inline void update_load_avg(struct sched_entity *se, int not_used1)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
struct rq *rq = rq_of(cfs_rq);
@@ -3423,6 +3396,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (renorm && !curr)
se->vruntime += cfs_rq->min_vruntime;

+ update_load_avg(se, UPDATE_TG);
enqueue_entity_load_avg(cfs_rq, se);
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
@@ -3497,6 +3471,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
+ update_load_avg(se, UPDATE_TG);
dequeue_entity_load_avg(cfs_rq, se);

update_stats_dequeue(cfs_rq, se, flags);
@@ -3575,7 +3550,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
update_stats_wait_end(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
- update_load_avg(se, 1);
+ update_load_avg(se, UPDATE_TG);
}

update_stats_curr_start(cfs_rq, se);
@@ -3693,7 +3668,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
/*
* Ensure that runnable average is periodically updated.
*/
- update_load_avg(curr, 1);
+ update_load_avg(curr, UPDATE_TG);
update_cfs_shares(cfs_rq);

#ifdef CONFIG_SCHED_HRTICK
@@ -4582,7 +4557,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;

- update_load_avg(se, 1);
+ update_load_avg(se, UPDATE_TG);
update_cfs_shares(cfs_rq);
}

@@ -4641,7 +4616,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;

- update_load_avg(se, 1);
+ update_load_avg(se, UPDATE_TG);
update_cfs_shares(cfs_rq);
}

@@ -8520,7 +8495,6 @@ static void detach_task_cfs_rq(struct task_struct *p)
{
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 now = cfs_rq_clock_task(cfs_rq);

if (!vruntime_normalized(p)) {
/*
@@ -8532,7 +8506,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
}

/* Catch up with the cfs_rq and remove our load when we leave */
- update_cfs_rq_load_avg(now, cfs_rq, false);
+ update_load_avg(se, 0);
detach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq, false);
}
@@ -8540,7 +8514,6 @@ static void detach_task_cfs_rq(struct task_struct *p)
static void attach_entity_cfs_rq(struct sched_entity *se)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 now = cfs_rq_clock_task(cfs_rq);

#ifdef CONFIG_FAIR_GROUP_SCHED
/*
@@ -8551,7 +8524,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
#endif

/* Synchronize task with its cfs_rq */
- update_cfs_rq_load_avg(now, cfs_rq, false);
+ update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
attach_entity_load_avg(cfs_rq, se);
update_tg_load_avg(cfs_rq, false);
}
--
1.9.1