[RFC PATCH 14/08] [DEBUG] sched/fair: Add more lb_stats around lb_time and stats reuse
From: K Prateek Nayak
Date: Sun Mar 16 2025 - 06:31:21 EST
Add stats for load balancing time and stats reuse efficiency.
Signed-off-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
---
include/linux/sched/topology.h | 5 +++++
kernel/sched/fair.c | 21 ++++++++++++++++++++-
kernel/sched/stats.c | 9 +++++++--
3 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index a16d7d9dd9d3..dea65eb263c6 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -123,6 +123,11 @@ struct sched_domain {
unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_min_time[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_max_time[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_total_time[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_stats_reused[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_stats_recomputed[CPU_MAX_IDLE_TYPES];
/* Active load balancing */
unsigned int alb_count;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 31501b933d45..bb7b21421415 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10491,10 +10491,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
* group_smt_balance is not possible under busy load balancing.
*/
if (can_retrieve_stats(env->sd, env->idle) &&
- retrieve_cached_stats(group, sgs))
+ retrieve_cached_stats(group, sgs)) {
+ schedstat_inc(env->sd->lb_stats_reused[env->idle]);
goto group_classify;
+ }
memset(sgs, 0, sizeof(*sgs));
+ schedstat_inc(env->sd->lb_stats_recomputed[env->idle]);
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);
@@ -11901,6 +11904,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
{
int ld_moved, cur_ld_moved, active_balance = 0;
struct sched_domain *sd_parent = sd->parent;
+ u64 lb_start = sched_clock_cpu(this_cpu);
struct sched_group *group;
struct rq *busiest;
struct rq_flags rf;
@@ -12174,6 +12178,21 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2;
out:
+ if (schedstat_enabled()) {
+ u64 now = sched_clock_cpu(this_cpu);
+ u64 elapsed = now - lb_start;
+
+ if (!schedstat_val(sd->lb_min_time[idle]) ||
+ elapsed < schedstat_val(sd->lb_min_time[idle]))
+ __schedstat_set(sd->lb_min_time[idle], elapsed);
+
+ if (!schedstat_val(sd->lb_max_time[idle]) ||
+ elapsed > schedstat_val(sd->lb_max_time[idle]))
+ __schedstat_set(sd->lb_max_time[idle], elapsed);
+
+ __schedstat_add(sd->lb_total_time[idle], elapsed);
+ }
+
return ld_moved;
}
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 4346fd81c31f..b2ace3c51062 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,7 +141,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "domain%d %s %*pb", dcount++, sd->name,
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
+ seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u %u %u %lu %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
@@ -152,7 +152,12 @@ static int show_schedstat(struct seq_file *seq, void *v)
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
- sd->lb_nobusyg[itype]);
+ sd->lb_nobusyg[itype],
+ sd->lb_min_time[itype],
+ sd->lb_max_time[itype],
+ sd->lb_total_time[itype],
+ sd->lb_stats_reused[itype],
+ sd->lb_stats_recomputed[itype]);
}
seq_printf(seq,
" %u %u %u %u %u %u %u %u %u %u %u %u\n",
--
2.43.0