[PATCH 1/3] sched: make softirq cputime accounting separately in irqtime

From: Tio Zhang
Date: Tue Apr 02 2024 - 07:24:42 EST


Currently we account irq{,soft} time in "irqtime.total",
when CONFIG_IRQ_TIME_ACCOUNTING=y. Since we account them in
the same path (irq{,soft}_enter{,exit}), we can separately count them
by filtering the offset.
In order to not break backward compatibility, we do not change the meaning
of "total", we only let softirq time to be accounted separately in
a new field "total_soft".
So interrupt time could also be calculated by "total" minus "total_soft".

This patch only let softirq cputime stats available in irqtime, do not
make it in real usage.

Signed-off-by: Tio Zhang <tiozhang@xxxxxxxxxxxxxx>
---
kernel/sched/cputime.c | 18 ++++++++++++++----
kernel/sched/sched.h | 16 ++++++++++++++++
2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..23e4bca1e3e8 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -35,13 +35,14 @@ void disable_sched_clock_irqtime(void)
}

static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
- enum cpu_usage_stat idx)
+ u64 delta_soft, enum cpu_usage_stat idx)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;

u64_stats_update_begin(&irqtime->sync);
cpustat[idx] += delta;
irqtime->total += delta;
+ irqtime->total_soft += delta_soft;
irqtime->tick_delta += delta;
u64_stats_update_end(&irqtime->sync);
}
@@ -54,7 +55,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
unsigned int pc;
- s64 delta;
+ s64 delta, delta_soft = 0;
int cpu;

if (!sched_clock_irqtime)
@@ -65,6 +66,15 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
irqtime->irq_start_time += delta;
pc = irq_count() - offset;

+ /*
+ * We only account softirq time when we are called by
+ * account_softirq_enter{,exit}
+ */
+ if ((offset & SOFTIRQ_OFFSET) || (pc & SOFTIRQ_OFFSET)) {
+ delta_soft = sched_clock_cpu(cpu) - irqtime->soft_start_time;
+ irqtime->soft_start_time += delta_soft;
+ }
+
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
@@ -72,9 +82,9 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
* that do not consume any time, but still wants to run.
*/
if (pc & HARDIRQ_MASK)
- irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
+ irqtime_account_delta(irqtime, delta, delta_soft, CPUTIME_IRQ);
else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd())
- irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+ irqtime_account_delta(irqtime, delta, delta_soft, CPUTIME_SOFTIRQ);
}

static u64 irqtime_tick_accounted(u64 maxtime)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 001fe047bd5d..f479c61b84b5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2931,8 +2931,10 @@ static inline void nohz_run_idle_balance(int cpu) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;
+ u64 total_soft;
u64 tick_delta;
u64 irq_start_time;
+ u64 soft_start_time;
struct u64_stats_sync sync;
};

@@ -2956,6 +2958,20 @@ static inline u64 irq_time_read(int cpu)

return total;
}
+
+static inline u64 irq_time_read_soft(int cpu)
+{
+ struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+ unsigned int seq;
+ u64 total_soft;
+
+ do {
+ seq = __u64_stats_fetch_begin(&irqtime->sync);
+ total_soft = irqtime->total_soft;
+ } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+
+ return total_soft;
+}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */

#ifdef CONFIG_CPU_FREQ
--
2.17.1