[RFC PATCH V2 13/19] sched: packing transitory tasks in wakeup power balancing

From: Preeti U Murthy
Date: Mon Aug 11 2014 - 07:40:26 EST


From: Alex Shi <alex.shi@xxxxxxxxx>

If the waked task is transitory enough, it will has a chance to be
packed into a cpu which is busy but still has time to care it.

For powersaving policy, only the history util < 25% task has chance to
be packed. If there is no cpu eligible to handle it, will use a idlest
cpu in leader group.

Morten Rasmussen catch a type bug. And PeterZ reminder to consider
rt_util. thanks you!

Inspired-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Signed-off-by: Alex Shi <alex.shi@xxxxxxxxx>
[Added CONFIG_SCHED_POWER switch to enable this patch]
Signed-off-by: Preeti U Murthy <preeti@xxxxxxxxxxxxxxxxxx>
---

kernel/sched/fair.c | 56 +++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3db77e8..e7a677e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4693,24 +4693,65 @@ static inline int get_sd_sched_balance_policy(struct sched_domain *sd,
}

/*
+ * find_leader_cpu - find the busiest but still has enough free time cpu
+ * among the cpus in group.
+ */
+static int
+find_leader_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
+ int policy)
+{
+ int vacancy, min_vacancy = INT_MAX;
+ int leader_cpu = -1;
+ int i;
+ /* percentage of the task's util */
+ unsigned putil = (u64)(p->se.avg.runnable_avg_sum << SCHED_CAPACITY_SHIFT)
+ / (p->se.avg.runnable_avg_period + 1);
+
+ /* bias toward local cpu */
+ if (cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p)) &&
+ FULL_UTIL - max_rq_util(this_cpu) - (putil << 2) > 0)
+ return this_cpu;
+
+ /* Traverse only the allowed CPUs */
+ for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
+ if (i == this_cpu)
+ continue;
+
+ /* only light task allowed, putil < 25% */
+ vacancy = FULL_UTIL - max_rq_util(i) - (putil << 2);
+
+ if (vacancy > 0 && vacancy < min_vacancy) {
+ min_vacancy = vacancy;
+ leader_cpu = i;
+ }
+ }
+ return leader_cpu;
+}
+
+/*
* If power policy is eligible for this domain, and it has task allowed cpu.
* we will select CPU from this domain.
*/
static int get_cpu_for_power_policy(struct sched_domain *sd, int cpu,
- struct task_struct *p, struct sd_lb_stats *sds)
+ struct task_struct *p, struct sd_lb_stats *sds, int wakeup)
{
int policy;
int new_cpu = -1;

policy = get_sd_sched_balance_policy(sd, cpu, p, sds);
- if (policy != SCHED_POLICY_PERFORMANCE && sds->group_leader)
- new_cpu = find_idlest_cpu(sds->group_leader, p, cpu);
-
+ if (policy != SCHED_POLICY_PERFORMANCE && sds->group_leader) {
+ if (wakeup)
+ new_cpu = find_leader_cpu(sds->group_leader,
+ p, cpu, policy);
+ /* for fork balancing and a little busy task */
+ if (new_cpu == -1)
+ new_cpu = find_idlest_cpu(sds->group_leader, p, cpu);
+ }
return new_cpu;
}
#else
static int get_cpu_for_power_policy(struct sched_domain *sd, int cpu,
- struct task_struct *p, struct sd_lb_stats *sds)
+ struct task_struct *p, struct sd_lb_stats *sds, int wakeup)
{
return -1;
}
@@ -4768,13 +4809,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
if (tmp->flags & sd_flag) {
sd = tmp;

- new_cpu = get_cpu_for_power_policy(sd, cpu, p, &sds);
+ new_cpu = get_cpu_for_power_policy(sd, cpu, p, &sds,
+ sd_flag & SD_BALANCE_WAKE);
if (new_cpu != -1)
goto unlock;
}
}
if (affine_sd) {
- new_cpu = get_cpu_for_power_policy(affine_sd, cpu, p, &sds);
+ new_cpu = get_cpu_for_power_policy(affine_sd, cpu, p, &sds, 1);
if (new_cpu != -1)
goto unlock;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/