[RFCv4 PATCH 23/34] sched: Extend sched_group_energy to test load-balancing decisions
From: Morten Rasmussen
Date: Tue May 12 2015 - 15:38:54 EST
Extended sched_group_energy() to support energy prediction with usage
(tasks) added/removed from a specific cpu or migrated between a pair of
cpus. Useful for load-balancing decision making.
cc: Ingo Molnar <mingo@xxxxxxxxxx>
cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Morten Rasmussen <morten.rasmussen@xxxxxxx>
---
kernel/sched/fair.c | 86 +++++++++++++++++++++++++++++++++++++----------------
1 file changed, 60 insertions(+), 26 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2677ca6..52403e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4810,15 +4810,17 @@ static unsigned long capacity_curr_of(int cpu)
* capacity_curr (but not capacity_orig) as it useful for predicting the
* capacity required after task migrations (scheduler-driven DVFS).
*/
-
-static unsigned long get_cpu_usage(int cpu)
+static unsigned long __get_cpu_usage(int cpu, int delta)
{
int sum;
unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
unsigned long blocked = cpu_rq(cpu)->cfs.utilization_blocked_avg;
unsigned long capacity_orig = capacity_orig_of(cpu);
- sum = usage + blocked;
+ sum = usage + blocked + delta;
+
+ if (sum < 0)
+ return 0;
if (sum >= capacity_orig)
return capacity_orig;
@@ -4826,13 +4828,28 @@ static unsigned long get_cpu_usage(int cpu)
return sum;
}
+static unsigned long get_cpu_usage(int cpu)
+{
+ return __get_cpu_usage(cpu, 0);
+}
+
static inline bool energy_aware(void)
{
return sched_feat(ENERGY_AWARE);
}
+struct energy_env {
+ struct sched_group *sg_top;
+ struct sched_group *sg_cap;
+ int cap_idx;
+ int usage_delta;
+ int src_cpu;
+ int dst_cpu;
+ int energy;
+};
+
/*
- * cpu_norm_usage() returns the cpu usage relative to a specific capacity,
+ * __cpu_norm_usage() returns the cpu usage relative to a specific capacity,
* i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
* energy calculations. Using the scale-invariant usage returned by
* get_cpu_usage() and approximating scale-invariant usage by:
@@ -4845,9 +4862,9 @@ static inline bool energy_aware(void)
*
* norm_usage = running_time/time ~ usage/capacity
*/
-static unsigned long cpu_norm_usage(int cpu, unsigned long capacity)
+static unsigned long __cpu_norm_usage(int cpu, unsigned long capacity, int delta)
{
- int usage = __get_cpu_usage(cpu);
+ int usage = __get_cpu_usage(cpu, delta);
if (usage >= capacity)
return SCHED_CAPACITY_SCALE;
@@ -4855,13 +4872,25 @@ static unsigned long cpu_norm_usage(int cpu, unsigned long capacity)
return (usage << SCHED_CAPACITY_SHIFT)/capacity;
}
-static unsigned long group_max_usage(struct sched_group *sg)
+static int calc_usage_delta(struct energy_env *eenv, int cpu)
{
- int i;
+ if (cpu == eenv->src_cpu)
+ return -eenv->usage_delta;
+ if (cpu == eenv->dst_cpu)
+ return eenv->usage_delta;
+ return 0;
+}
+
+static
+unsigned long group_max_usage(struct energy_env *eenv, struct sched_group *sg)
+{
+ int i, delta;
unsigned long max_usage = 0;
- for_each_cpu(i, sched_group_cpus(sg))
- max_usage = max(max_usage, get_cpu_usage(i));
+ for_each_cpu(i, sched_group_cpus(sg)) {
+ delta = calc_usage_delta(eenv, i);
+ max_usage = max(max_usage, __get_cpu_usage(i, delta));
+ }
return max_usage;
}
@@ -4875,31 +4904,36 @@ static unsigned long group_max_usage(struct sched_group *sg)
* latter is used as the estimate as it leads to a more pessimistic energy
* estimate (more busy).
*/
-static unsigned long group_norm_usage(struct sched_group *sg, int cap_idx)
+static unsigned
+long group_norm_usage(struct energy_env *eenv, struct sched_group *sg)
{
- int i;
+ int i, delta;
unsigned long usage_sum = 0;
- unsigned long capacity = sg->sge->cap_states[cap_idx].cap;
+ unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
- for_each_cpu(i, sched_group_cpus(sg))
- usage_sum += cpu_norm_usage(i, capacity);
+ for_each_cpu(i, sched_group_cpus(sg)) {
+ delta = calc_usage_delta(eenv, i);
+ usage_sum += __cpu_norm_usage(i, capacity, delta);
+ }
if (usage_sum > SCHED_CAPACITY_SCALE)
return SCHED_CAPACITY_SCALE;
return usage_sum;
}
-static int find_new_capacity(struct sched_group *sg,
+static int find_new_capacity(struct energy_env *eenv,
struct sched_group_energy *sge)
{
int idx;
- unsigned long util = group_max_usage(sg);
+ unsigned long util = group_max_usage(eenv, eenv->sg_cap);
for (idx = 0; idx < sge->nr_cap_states; idx++) {
if (sge->cap_states[idx].cap >= util)
return idx;
}
+ eenv->cap_idx = idx;
+
return idx;
}
@@ -4912,16 +4946,16 @@ static int find_new_capacity(struct sched_group *sg,
* gather the same usage statistics multiple times. This can probably be done in
* a faster but more complex way.
*/
-static unsigned int sched_group_energy(struct sched_group *sg_top)
+static unsigned int sched_group_energy(struct energy_env *eenv)
{
struct sched_domain *sd;
int cpu, total_energy = 0;
struct cpumask visit_cpus;
struct sched_group *sg;
- WARN_ON(!sg_top->sge);
+ WARN_ON(!eenv->sg_top->sge);
- cpumask_copy(&visit_cpus, sched_group_cpus(sg_top));
+ cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
while (!cpumask_empty(&visit_cpus)) {
struct sched_group *sg_shared_cap = NULL;
@@ -4944,17 +4978,16 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
break;
do {
- struct sched_group *sg_cap_util;
unsigned long group_util;
int sg_busy_energy, sg_idle_energy, cap_idx;
if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
- sg_cap_util = sg_shared_cap;
+ eenv->sg_cap = sg_shared_cap;
else
- sg_cap_util = sg;
+ eenv->sg_cap = sg;
- cap_idx = find_new_capacity(sg_cap_util, sg->sge);
- group_util = group_norm_usage(sg, cap_idx);
+ cap_idx = find_new_capacity(eenv, sg->sge);
+ group_util = group_norm_usage(eenv, sg);
sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
>> SCHED_CAPACITY_SHIFT;
sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) * sg->sge->idle_states[0].power)
@@ -4965,7 +4998,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
if (!sd->child)
cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
- if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top)))
+ if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
goto next_cpu;
} while (sg = sg->next, sg != sd->groups);
@@ -4974,6 +5007,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
continue;
}
+ eenv->energy = total_energy;
return total_energy;
}
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/