[RFCv3 PATCH 18/48] sched: Track blocked utilization contributions

From: Morten Rasmussen
Date: Wed Feb 04 2015 - 13:40:38 EST

Next message: Jonathan Cameron: "Re: [PATCH 1/3] iio: vf610_adc: cleanup wait_for_completion return handling"
Previous message: Morten Rasmussen: "[RFCv3 PATCH 15/48] arm: vexpress: Add CPU clock-frequencies to TC2 device-tree"
In reply to: Morten Rasmussen: "[RFCv3 PATCH 15/48] arm: vexpress: Add CPU clock-frequencies to TC2 device-tree"
Next in thread: Morten Rasmussen: "[RFCv3 PATCH 17/48] sched: Get rid of scaling usage by cpu_capacity_orig"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Introduces the blocked utilization, the utilization counter-part to
cfs_rq->utilization_load_avg. It is the sum of sched_entity utilization
contributions of entities that were recently on the cfs_rq that are
currently blocked. Combined with sum of contributions of entities
currently on the cfs_rq or currently running
(cfs_rq->utilization_load_avg) this can provide a more stable average
view of the cpu usage.

cc: Ingo Molnar <mingo@xxxxxxxxxx>
cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

Signed-off-by: Morten Rasmussen <morten.rasmussen@xxxxxxx>
---
kernel/sched/fair.c | 30 +++++++++++++++++++++++++++++-
kernel/sched/sched.h | 8 ++++++--
2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a85c34b..0fc8963 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2775,6 +2775,15 @@ static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
cfs_rq->blocked_load_avg = 0;
}

+static inline void subtract_utilization_blocked_contrib(struct cfs_rq *cfs_rq,
+ long utilization_contrib)
+{
+ if (likely(utilization_contrib < cfs_rq->utilization_blocked_avg))
+ cfs_rq->utilization_blocked_avg -= utilization_contrib;
+ else
+ cfs_rq->utilization_blocked_avg = 0;
+}
+
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);

/* Update a sched_entity's runnable average */
@@ -2810,6 +2819,8 @@ static inline void update_entity_load_avg(struct sched_entity *se,
cfs_rq->utilization_load_avg += utilization_delta;
} else {
subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+ subtract_utilization_blocked_contrib(cfs_rq,
+ -utilization_delta);
}
}

@@ -2827,14 +2838,20 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
return;

if (atomic_long_read(&cfs_rq->removed_load)) {
- unsigned long removed_load;
+ unsigned long removed_load, removed_utilization;
removed_load = atomic_long_xchg(&cfs_rq->removed_load, 0);
+ removed_utilization =
+ atomic_long_xchg(&cfs_rq->removed_utilization, 0);
subtract_blocked_load_contrib(cfs_rq, removed_load);
+ subtract_utilization_blocked_contrib(cfs_rq,
+ removed_utilization);
}

if (decays) {
cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
decays);
+ cfs_rq->utilization_blocked_avg =
+ decay_load(cfs_rq->utilization_blocked_avg, decays);
atomic64_add(decays, &cfs_rq->decay_counter);
cfs_rq->last_decay = now;
}
@@ -2881,6 +2898,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
/* migrated tasks did not contribute to our blocked load */
if (wakeup) {
subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
+ subtract_utilization_blocked_contrib(cfs_rq,
+ se->avg.utilization_avg_contrib);
update_entity_load_avg(se, 0);
}

@@ -2907,6 +2926,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
if (sleep) {
cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
+ cfs_rq->utilization_blocked_avg +=
+ se->avg.utilization_avg_contrib;
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
} /* migrations, e.g. sleep=0 leave decay_count == 0 */
}
@@ -4927,6 +4948,8 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu)
se->avg.decay_count = -__synchronize_entity_decay(se);
atomic_long_add(se->avg.load_avg_contrib,
&cfs_rq->removed_load);
+ atomic_long_add(se->avg.utilization_avg_contrib,
+ &cfs_rq->removed_utilization);
}

/* We have migrated, no longer consider this task hot */
@@ -7942,6 +7965,8 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
if (se->avg.decay_count) {
__synchronize_entity_decay(se);
subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
+ subtract_utilization_blocked_contrib(cfs_rq,
+ se->avg.utilization_avg_contrib);
}
#endif
}
@@ -8001,6 +8026,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_SMP
atomic64_set(&cfs_rq->decay_counter, 1);
atomic_long_set(&cfs_rq->removed_load, 0);
+ atomic_long_set(&cfs_rq->removed_utilization, 0);
#endif
}

@@ -8053,6 +8079,8 @@ static void task_move_group_fair(struct task_struct *p, int queued)
*/
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
+ cfs_rq->utilization_blocked_avg +=
+ se->avg.utilization_avg_contrib;
#endif
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e402133..208237f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -368,11 +368,15 @@ struct cfs_rq {
* the blocked sched_entities on the rq.
* utilization_load_avg is the sum of the average running time of the
* sched_entities on the rq.
+ * utilization_blocked_avg is the utilization equivalent of
+ * blocked_load_avg, i.e. the sum of running contributions of blocked
+ * sched_entities associated with the rq.
*/
- unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
+ unsigned long runnable_load_avg, blocked_load_avg;
+ unsigned long utilization_load_avg, utilization_blocked_avg;
atomic64_t decay_counter;
u64 last_decay;
- atomic_long_t removed_load;
+ atomic_long_t removed_load, removed_utilization;

#ifdef CONFIG_FAIR_GROUP_SCHED
/* Required to track per-cpu representation of a task_group */
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Jonathan Cameron: "Re: [PATCH 1/3] iio: vf610_adc: cleanup wait_for_completion return handling"
Previous message: Morten Rasmussen: "[RFCv3 PATCH 15/48] arm: vexpress: Add CPU clock-frequencies to TC2 device-tree"
In reply to: Morten Rasmussen: "[RFCv3 PATCH 15/48] arm: vexpress: Add CPU clock-frequencies to TC2 device-tree"
Next in thread: Morten Rasmussen: "[RFCv3 PATCH 17/48] sched: Get rid of scaling usage by cpu_capacity_orig"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]