Re: [RFC PATCH 2/5] sched/events: Introduce cfs_rq load tracking trace event

From: Peter Zijlstra
Date: Thu Mar 30 2017 - 03:04:33 EST


On Wed, Mar 29, 2017 at 11:03:45PM +0200, Dietmar Eggemann wrote:

> > +static int
> > +__update_load_avg_blocked_se(u64 now, int cpu, struct sched_avg *sa)
> > +{
> > + return ___update_load_avg(now, cpu, sa, 0, 0, NULL);
> > +}
> > +
> > +static int
> > +__update_load_avg_se(u64 now, int cpu, struct sched_avg *sa,
> > + unsigned long weight, int running)
> > +{
> > + return ___update_load_avg(now, cpu, sa, weight, running, NULL);
> > +}
> > +
> > +static int
> > +__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
> > + unsigned long weight, int running, struct cfs_rq *cfs_rq)
> > +{
> > + return ___update_load_avg(now, cpu, sa, weight, running, cfs_rq);
> > +}
>
> Why not reduce the parameter list of these 3 incarnations to 'now, cpu,
> object'?
>
> static int
> __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
>
> static int
> __update_load_avg_se(u64 now, int cpu, struct sched_entity *se)
>
> static int
> __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
>
> [...]

doesn't quite work with se, but yes good idea.

And this way we don't need the nonnull attribute either, because it
should be clear from having dereferenced it that it cannot be null.

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2849,7 +2849,7 @@ static u32 __compute_runnable_contrib(u6
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
*/
static __always_inline int
-__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
+___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq)
{
u64 delta, scaled_delta, periods;
@@ -2953,6 +2953,28 @@ __update_load_avg(u64 now, int cpu, stru
return decayed;
}

+static int
+__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
+{
+ return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL);
+}
+
+static int
+__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ return ___update_load_avg(now, cpu, &se->avg,
+ se->on_rq * scale_load_down(se->load.weight),
+ cfs_rq->curr == se, NULL);
+}
+
+static int
+__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
+{
+ return ___update_load_avg(now, cpu, &cfs_rq->avg,
+ scale_down_load(cfs_rq->load.weight),
+ cfs_rq->curr != NULL, cfs_rq);
+}
+
/*
* Signed add and clamp on underflow.
*
@@ -3014,6 +3036,9 @@ static inline void update_tg_load_avg(st
void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next)
{
+ u64 p_last_update_time;
+ u64 n_last_update_time;
+
if (!sched_feat(ATTACH_AGE_LOAD))
return;

@@ -3024,11 +3049,11 @@ void set_task_rq_fair(struct sched_entit
* time. This will result in the wakee task is less decayed, but giving
* the wakee more load sounds not bad.
*/
- if (se->avg.last_update_time && prev) {
- u64 p_last_update_time;
- u64 n_last_update_time;
+ if (!(se->avg.last_update_time && prev))
+ return;

#ifndef CONFIG_64BIT
+ {
u64 p_last_update_time_copy;
u64 n_last_update_time_copy;

@@ -3043,14 +3068,13 @@ void set_task_rq_fair(struct sched_entit

} while (p_last_update_time != p_last_update_time_copy ||
n_last_update_time != n_last_update_time_copy);
+ }
#else
- p_last_update_time = prev->avg.last_update_time;
- n_last_update_time = next->avg.last_update_time;
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
#endif
- __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
- &se->avg, 0, 0, NULL);
- se->avg.last_update_time = n_last_update_time;
- }
+ __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
+ se->avg.last_update_time = n_last_update_time;
}

/* Take into account change of utilization of a child task group */
@@ -3295,8 +3319,7 @@ update_cfs_rq_load_avg(u64 now, struct c
set_tg_cfs_propagate(cfs_rq);
}

- decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
- scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
+ decayed = __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);

#ifndef CONFIG_64BIT
smp_wmb();
@@ -3328,11 +3351,8 @@ static inline void update_load_avg(struc
* Track task load average for carrying it to new CPU after migrated, and
* track group sched_entity load average for task_h_load calc in migration
*/
- if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
- __update_load_avg(now, cpu, &se->avg,
- se->on_rq * scale_load_down(se->load.weight),
- cfs_rq->curr == se, NULL);
- }
+ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
+ __update_load_avg_se(now, cpu, cfs_rq, se);

decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
decayed |= propagate_entity_load_avg(se);
@@ -3437,7 +3457,7 @@ void sync_entity_load_avg(struct sched_e
u64 last_update_time;

last_update_time = cfs_rq_last_update_time(cfs_rq);
- __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+ __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
}

/*