[PATCH] kernel:sched:stats:/proc/schedstat: use seq_put_decimal_ull for decimal values

From: David Wang
Date: Fri Nov 08 2024 - 01:32:32 EST


seq_printf is costy, lots of decimal values are yield via seq_printf
when reading /proc/schedstat; Profiling indicates seq_printf takes more
than 90% of samples of show_schedstat:
show_schedstat(98.974% 667134/674048)
seq_printf(97.798% 652441/667134)
vsnprintf(97.810% 638155/652441)
format_decode(23.720% 151368/638155)
number(16.797% 107191/638155)
memcpy_orig(4.610% 29422/638155)
srso_return_thunk(2.738% 17475/638155)
bitmap_string.isra.0(0.928% 5921/638155)
__memcpy(0.407% 2599/638155)
pointer(0.089% 571/638155)
srso_safe_ret(0.003% 16/638155)
__rcu_read_unlock(0.097% 648/667134)
__rcu_read_lock(0.097% 647/667134)

And one million rounds of open/read/close /proc/schedstat took:

real 1m12.713s
user 0m0.232s
sys 1m12.440s
One average, each open/read/close sequence tooks 0.072ms.

With this patch, performance is significantly improved:

real 0m30.141s
user 0m0.320s
sys 0m29.820s
One average, each open/read/close sequence tooks 0.029ms, ~60%
improvement.

Signed-off-by: David Wang <00107082@xxxxxxx>
---
kernel/sched/stats.c | 62 +++++++++++++++++++++++++-------------------
1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..21d8c2edbc43 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -122,15 +122,18 @@ static int show_schedstat(struct seq_file *seq, void *v)
rq = cpu_rq(cpu);

/* runqueue-specific stats */
- seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
- cpu, rq->yld_count,
- rq->sched_count, rq->sched_goidle,
- rq->ttwu_count, rq->ttwu_local,
- rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
- seq_printf(seq, "\n");
+ seq_puts(seq, "cpu");
+ seq_put_decimal_ull(seq, "", cpu);
+ seq_put_decimal_ull(seq, " ", rq->yld_count);
+ seq_put_decimal_ull(seq, " ", 0);
+ seq_put_decimal_ull(seq, " ", rq->sched_count);
+ seq_put_decimal_ull(seq, " ", rq->sched_goidle);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_count);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_local);
+ seq_put_decimal_ull(seq, " ", rq->rq_cpu_time);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.run_delay);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.pcount);
+ seq_putc(seq, '\n');

#ifdef CONFIG_SMP
/* domain-specific stats */
@@ -138,26 +141,33 @@ static int show_schedstat(struct seq_file *seq, void *v)
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;

- seq_printf(seq, "domain%d %*pb", dcount++,
+ seq_puts(seq, "domain");
+ seq_put_decimal_ull(seq, "", dcount++);
+ seq_printf(seq, " %*pb",
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u",
- sd->lb_count[itype],
- sd->lb_balanced[itype],
- sd->lb_failed[itype],
- sd->lb_imbalance[itype],
- sd->lb_gained[itype],
- sd->lb_hot_gained[itype],
- sd->lb_nobusyq[itype],
- sd->lb_nobusyg[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_count[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_balanced[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_failed[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_hot_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyq[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyg[itype]);
}
- seq_printf(seq,
- " %u %u %u %u %u %u %u %u %u %u %u %u\n",
- sd->alb_count, sd->alb_failed, sd->alb_pushed,
- sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
- sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
- sd->ttwu_wake_remote, sd->ttwu_move_affine,
- sd->ttwu_move_balance);
+ seq_put_decimal_ull(seq, " ", sd->alb_count);
+ seq_put_decimal_ull(seq, " ", sd->alb_failed);
+ seq_put_decimal_ull(seq, " ", sd->alb_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbe_count);
+ seq_put_decimal_ull(seq, " ", sd->sbe_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbe_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbf_count);
+ seq_put_decimal_ull(seq, " ", sd->sbf_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbf_pushed);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_wake_remote);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_affine);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_balance);
+ seq_putc(seq, '\n');
}
rcu_read_unlock();
#endif
--
2.39.2