[RFC PATCH V2 12/19] sched: using avg_idle to detect bursty wakeup

From: Preeti U Murthy
Date: Mon Aug 11 2014 - 07:39:06 EST


From: Alex Shi <alex.shi@xxxxxxxxx>

Sleeping task has no utiliation, when they were bursty waked up, the
zero utilization make scheduler out of balance, like aim7 benchmark.

rq->avg_idle is 'to used to accommodate bursty loads in a dirt simple
dirt cheap manner' -- Mike Galbraith.

With this cheap and smart bursty indicator, we can find the wake up
burst, and use nr_running as instant utilization in this scenario.

For other scenarios, we still use the precise CPU utilization to
judage if a domain is eligible for power scheduling.

Thanks for Mike Galbraith's idea!

Signed-off-by: Alex Shi <alex.shi@xxxxxxxxx>
[Added CONFIG_SCHED_POWER switch to enable this patch]
Signed-off-by: Preeti U Murthy <preeti@xxxxxxxxxxxxxxxxxx>
---

kernel/sched/fair.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e993f1c..3db77e8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4605,12 +4605,19 @@ struct sd_lb_stats {
* Try to collect the task running number and capacity of the group.
*/
static void get_sg_power_stats(struct sched_group *group,
- struct sched_domain *sd, struct sg_lb_stats *sgs)
+ struct sched_domain *sd, struct sg_lb_stats *sgs, int burst)
{
int i;

- for_each_cpu(i, sched_group_cpus(group))
- sgs->group_util += max_rq_util(i);
+ for_each_cpu(i, sched_group_cpus(group)) {
+ struct rq *rq = cpu_rq(i);
+
+ if (burst && rq->nr_running > 1)
+ /* use nr_running as instant utilization */
+ sgs->group_util += rq->nr_running;
+ else
+ sgs->group_util += max_rq_util(i);
+ }

sgs->group_weight = group->group_weight;
}
@@ -4624,6 +4631,8 @@ static int is_sd_full(struct sched_domain *sd,
struct sched_group *group;
struct sg_lb_stats sgs;
long sd_min_delta = LONG_MAX;
+ int cpu = task_cpu(p);
+ int burst = 0;
unsigned int putil;

if (p->se.load.weight == p->se.avg.load_avg_contrib)
@@ -4633,15 +4642,21 @@ static int is_sd_full(struct sched_domain *sd,
putil = (u64)(p->se.avg.runnable_avg_sum << SCHED_CAPACITY_SHIFT)
/ (p->se.avg.runnable_avg_period + 1);

+ if (cpu_rq(cpu)->avg_idle < sysctl_sched_burst_threshold)
+ burst = 1;
+
/* Try to collect the domain's utilization */
group = sd->groups;
do {
long g_delta;

memset(&sgs, 0, sizeof(sgs));
- get_sg_power_stats(group, sd, &sgs);
+ get_sg_power_stats(group, sd, &sgs, burst);

- g_delta = sgs.group_weight * FULL_UTIL - sgs.group_util;
+ if (burst)
+ g_delta = sgs.group_weight - sgs.group_util;
+ else
+ g_delta = sgs.group_weight * FULL_UTIL - sgs.group_util;

if (g_delta > 0 && g_delta < sd_min_delta) {
sd_min_delta = g_delta;
@@ -4651,8 +4666,12 @@ static int is_sd_full(struct sched_domain *sd,
sds->sd_util += sgs.group_util;
} while (group = group->next, group != sd->groups);

- if (sds->sd_util + putil < sd->span_weight * FULL_UTIL)
- return 0;
+ if (burst) {
+ if (sds->sd_util < sd->span_weight)
+ return 0;
+ } else
+ if (sds->sd_util + putil < sd->span_weight * FULL_UTIL)
+ return 0;

/* can not hold one more task in this domain */
return 1;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/