/*
* Use locality-friendly rq->overloaded to cache the status of the rq
* to minimize the heavy cost on LLC shared data.
@@ -7837,6 +7867,22 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (kthread_is_per_cpu(p))
return 0;
+ if (unlikely(task_h_idle(p))) {
+ /*
+ * Disregard hierarchically idle tasks during sched-idle
+ * load balancing.
+ */
+ if (env->idle == CPU_SCHED_IDLE)
+ return 0;
+ } else if (!static_branch_unlikely(&sched_asym_cpucapacity)) {
+ /*
+ * It's not gonna help if stacking non-idle tasks on one
+ * cpu while leaving some idle.
+ */
+ if (cfs_rq_busy(env->src_rq) && !need_pull_cfs_task(env->dst_rq))
+ return 0;
These checks don't involve the task at all, so this kind of check
should be pushed into the more general load balance function. But, I'm
not totally clear on the motivation here. If we have cpu A with 1
non-idle task and 100 idle tasks, and cpu B with 1 non-idle task, we
should definitely try to load balance some of the idle tasks from A to
B. idle tasks _do_ get time to run (although little), and this can add
up and cause antagonism to the non-idle task if there are a lot of
idle threads.
CPU_SCHED_IDLE means triggered by sched_idle_balance() in which pulls
a non-idle task for the unoccupied cpu from the overloaded ones, so
idle tasks are not the target and should be skipped.
The second part is: if we have cpu A with 1 non-idle task and 100 idle
tasks, and B with >=1 non-idle task, we don't migrate the last non-idle
task on A to B.
It could be possible that we do want to migrate the last non-idle task
from A to B, if the weight sum of idle tasks on A is very high (easily
possible with affinity restrictions). So I think we should leave
regular load balance alone here if it really wants to move the
non-idle task, and wrap this entire block in an if (env->idle ==
CPU_SCHED_IDLE).