On Wed, 11 Dec 2024 at 19:58, K Prateek Nayak <kprateek.nayak@xxxxxxx> wrote:
Aggregate nr_numa_running and nr_preferred_running when load balancing
at NUMA domains only. While at it, also move the aggregation below the
idle_cpu() check since an idle CPU cannot have any preferred tasks.
Signed-off-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
---
kernel/sched/fair.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2c4ebfc82917..ec2a79c8d0e7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10340,7 +10340,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
bool *sg_overloaded,
bool *sg_overutilized)
{
- int i, nr_running, local_group;
+ int i, nr_running, local_group, sd_flags = env->sd->flags;
memset(sgs, 0, sizeof(*sgs));
@@ -10364,10 +10364,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
if (cpu_overutilized(i))
*sg_overutilized = 1;
-#ifdef CONFIG_NUMA_BALANCING
- sgs->nr_numa_running += rq->nr_numa_running;
- sgs->nr_preferred_running += rq->nr_preferred_running;
-#endif
/*
* No need to call idle_cpu() if nr_running is not 0
*/
@@ -10377,10 +10373,17 @@ static inline void update_sg_lb_stats(struct lb_env *env,
continue;
}
+#ifdef CONFIG_NUMA_BALANCING
+ /* Only fbq_classify_group() uses this to classify NUMA groups */
and fbq_classify_rq() which is also used by non-NUMA groups.
AFAICT
It doesn't change anything at the end because group type is "all" for
non numa groups but we need some explanations why It's ok to skip numa
stats and default behavior will remain unchanged
+ if (sd_flags & SD_NUMA) {
+ sgs->nr_numa_running += rq->nr_numa_running;
+ sgs->nr_preferred_running += rq->nr_preferred_running;
+ }
+#endif
if (local_group)
continue;
- if (env->sd->flags & SD_ASYM_CPUCAPACITY) {
+ if (sd_flags & SD_ASYM_CPUCAPACITY) {
/* Check for a misfit task on the cpu */
if (sgs->group_misfit_task_load < rq->misfit_task_load) {
sgs->group_misfit_task_load = rq->misfit_task_load;
--
2.34.1