[PATCH v2 8/8] sched/fair: use utilization to select misfit task

From: Vincent Guittot
Date: Thu Aug 01 2019 - 10:40:54 EST


utilization is used to detect a misfit task but the load is then used to
select the task on the CPU which can lead to select a small task with
high weight instead of the task that triggered the misfit migration.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 28 ++++++++++++++--------------
kernel/sched/sched.h | 2 +-
2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 53e64a7..d08cc12 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3817,16 +3817,16 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
return;

if (!p) {
- rq->misfit_task_load = 0;
+ rq->misfit_task_util = 0;
return;
}

if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
- rq->misfit_task_load = 0;
+ rq->misfit_task_util = 0;
return;
}

- rq->misfit_task_load = task_h_load(p);
+ rq->misfit_task_util = task_util_est(p);
}

#else /* CONFIG_SMP */
@@ -7487,14 +7487,14 @@ static int detach_tasks(struct lb_env *env)
break;

case migrate_misfit:
- load = task_h_load(p);
+ util = task_util_est(p);

/*
* utilization of misfit task might decrease a bit
* since it has been recorded. Be conservative in the
* condition.
*/
- if (load < env->imbalance)
+ if (2*util < env->imbalance)
goto next;

env->imbalance = 0;
@@ -7785,7 +7785,7 @@ struct sg_lb_stats {
unsigned int group_weight;
enum group_type group_type;
unsigned int group_asym_capacity; /* tasks should be move to preferred cpu */
- unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
+ unsigned long group_misfit_task_util; /* A CPU has a task too big for its capacity */
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
@@ -7959,7 +7959,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
*/
static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
{
- return rq->misfit_task_load &&
+ return rq->misfit_task_util &&
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
check_cpu_capacity(rq, sd));
}
@@ -8078,7 +8078,7 @@ group_type group_classify(struct lb_env *env,
if (sgs->group_asym_capacity)
return group_asym_capacity;

- if (sgs->group_misfit_task_load)
+ if (sgs->group_misfit_task_util)
return group_misfit_task;

if (!group_has_capacity(env, sgs))
@@ -8164,8 +8164,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,

/* Check for a misfit task on the cpu */
if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
- sgs->group_misfit_task_load < rq->misfit_task_load) {
- sgs->group_misfit_task_load = rq->misfit_task_load;
+ sgs->group_misfit_task_util < rq->misfit_task_util) {
+ sgs->group_misfit_task_util = rq->misfit_task_util;
*sg_status |= SG_OVERLOAD;
}
}
@@ -8261,7 +8261,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* If we have more than one misfit sg go with the
* biggest misfit.
*/
- if (sgs->group_misfit_task_load < busiest->group_misfit_task_load)
+ if (sgs->group_misfit_task_util < busiest->group_misfit_task_util)
return false;
break;

@@ -8458,7 +8458,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
if (busiest->group_type == group_misfit_task) {
/* Set imbalance to allow misfit task to be balanced. */
env->balance_type = migrate_misfit;
- env->imbalance = busiest->group_misfit_task_load;
+ env->imbalance = busiest->group_misfit_task_util;
return;
}

@@ -8801,8 +8801,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
* For ASYM_CPUCAPACITY domains with misfit tasks we simply
* seek the "biggest" misfit task.
*/
- if (rq->misfit_task_load > busiest_load) {
- busiest_load = rq->misfit_task_load;
+ if (rq->misfit_task_util > busiest_util) {
+ busiest_util = rq->misfit_task_util;
busiest = rq;
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7583fad..ef6e1b2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -916,7 +916,7 @@ struct rq {

unsigned char idle_balance;

- unsigned long misfit_task_load;
+ unsigned long misfit_task_util;

/* For active balancing */
int active_balance;
--
2.7.4