[PATCH v2 3/8] sched/fair: remove meaningless imbalance calculation

From: Vincent Guittot
Date: Thu Aug 01 2019 - 10:40:43 EST


clean up load_balance and remove meaningless calculation and fields before
adding new algorithm.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
kernel/sched/fair.c | 105 +---------------------------------------------------
1 file changed, 1 insertion(+), 104 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d7f76b0..d7f4a7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5450,18 +5450,6 @@ static unsigned long capacity_of(int cpu)
return cpu_rq(cpu)->cpu_capacity;
}

-static unsigned long cpu_avg_load_per_task(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
- unsigned long load_avg = cpu_runnable_load(rq);
-
- if (nr_running)
- return load_avg / nr_running;
-
- return 0;
-}
-
static void record_wakee(struct task_struct *p)
{
/*
@@ -7735,7 +7723,6 @@ static unsigned long task_h_load(struct task_struct *p)
struct sg_lb_stats {
unsigned long avg_load; /*Avg load across the CPUs of the group */
unsigned long group_load; /* Total load over the CPUs of the group */
- unsigned long load_per_task;
unsigned long group_capacity;
unsigned long group_util; /* Total utilization of the group */
unsigned int sum_h_nr_running; /* Nr tasks running in the group */
@@ -8117,9 +8104,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;

- if (sgs->sum_h_nr_running)
- sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running;
-
sgs->group_weight = group->group_weight;

sgs->group_no_capacity = group_is_overloaded(env, sgs);
@@ -8350,76 +8334,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
}

/**
- * fix_small_imbalance - Calculate the minor imbalance that exists
- * amongst the groups of a sched_domain, during
- * load balancing.
- * @env: The load balancing environment.
- * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
- */
-static inline
-void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
-{
- unsigned long tmp, capa_now = 0, capa_move = 0;
- unsigned int imbn = 2;
- unsigned long scaled_busy_load_per_task;
- struct sg_lb_stats *local, *busiest;
-
- local = &sds->local_stat;
- busiest = &sds->busiest_stat;
-
- if (!local->sum_h_nr_running)
- local->load_per_task = cpu_avg_load_per_task(env->dst_cpu);
- else if (busiest->load_per_task > local->load_per_task)
- imbn = 1;
-
- scaled_busy_load_per_task =
- (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
- busiest->group_capacity;
-
- if (busiest->avg_load + scaled_busy_load_per_task >=
- local->avg_load + (scaled_busy_load_per_task * imbn)) {
- env->imbalance = busiest->load_per_task;
- return;
- }
-
- /*
- * OK, we don't have enough imbalance to justify moving tasks,
- * however we may be able to increase total CPU capacity used by
- * moving them.
- */
-
- capa_now += busiest->group_capacity *
- min(busiest->load_per_task, busiest->avg_load);
- capa_now += local->group_capacity *
- min(local->load_per_task, local->avg_load);
- capa_now /= SCHED_CAPACITY_SCALE;
-
- /* Amount of load we'd subtract */
- if (busiest->avg_load > scaled_busy_load_per_task) {
- capa_move += busiest->group_capacity *
- min(busiest->load_per_task,
- busiest->avg_load - scaled_busy_load_per_task);
- }
-
- /* Amount of load we'd add */
- if (busiest->avg_load * busiest->group_capacity <
- busiest->load_per_task * SCHED_CAPACITY_SCALE) {
- tmp = (busiest->avg_load * busiest->group_capacity) /
- local->group_capacity;
- } else {
- tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
- local->group_capacity;
- }
- capa_move += local->group_capacity *
- min(local->load_per_task, local->avg_load + tmp);
- capa_move /= SCHED_CAPACITY_SCALE;
-
- /* Move if we gain throughput */
- if (capa_move > capa_now)
- env->imbalance = busiest->load_per_task;
-}
-
-/**
* calculate_imbalance - Calculate the amount of imbalance present within the
* groups of a given sched_domain during load balance.
* @env: load balance environment
@@ -8438,15 +8352,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
return;
}

- if (busiest->group_type == group_imbalanced) {
- /*
- * In the group_imb case we cannot rely on group-wide averages
- * to ensure CPU-load equilibrium, look at wider averages. XXX
- */
- busiest->load_per_task =
- min(busiest->load_per_task, sds->avg_load);
- }
-
/*
* Avg load of busiest sg can be less and avg load of local sg can
* be greater than avg load across all sgs of sd because avg load
@@ -8457,7 +8362,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
(busiest->avg_load <= sds->avg_load ||
local->avg_load >= sds->avg_load)) {
env->imbalance = 0;
- return fix_small_imbalance(env, sds);
+ return;
}

/*
@@ -8495,14 +8400,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
busiest->group_misfit_task_load);
}

- /*
- * if *imbalance is less than the average load per runnable task
- * there is no guarantee that any tasks will be moved so we'll have
- * a think about bumping its value to force at least one task to be
- * moved
- */
- if (env->imbalance < busiest->load_per_task)
- return fix_small_imbalance(env, sds);
}

/******* find_busiest_group() helpers end here *********************/
--
2.7.4