[PATCH 4/4] irqtime: drop local_irq_save/restore from irqtime_account_irq

From: riel
Date: Thu Jun 30 2016 - 15:36:18 EST


From: Rik van Riel <riel@xxxxxxxxxx>

Drop local_irq_save/restore from irqtime_account_irq.
Instead, have softirq and hardirq track their time spent
independently, with the softirq code subtracting hardirq
time that happened during the duration of the softirq run.

The softirq code can be interrupted by hardirq code at
any point in time, but it can check whether it got a
consistent snapshot of the timekeeping variables it wants,
and loop around in the unlikely case that it did not.

Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>
---
kernel/sched/cputime.c | 72 +++++++++++++++++++++++++++++++++++++++++---------
kernel/sched/sched.h | 38 +++++++++++++++++++++-----
2 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a0aefd4c7ea6..b78991fac228 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -26,7 +26,9 @@
DEFINE_PER_CPU(u64, cpu_hardirq_time);
DEFINE_PER_CPU(u64, cpu_softirq_time);

-static DEFINE_PER_CPU(u64, irq_start_time);
+static DEFINE_PER_CPU(u64, hardirq_start_time);
+static DEFINE_PER_CPU(u64, softirq_start_time);
+static DEFINE_PER_CPU(u64, prev_hardirq_time);
static int sched_clock_irqtime;

void enable_sched_clock_irqtime(void)
@@ -41,6 +43,7 @@ void disable_sched_clock_irqtime(void)

#ifndef CONFIG_64BIT
DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+DEFINE_PER_CPU(seqcount_t, softirq_time_seq);
#endif /* CONFIG_64BIT */

/*
@@ -53,36 +56,79 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
* softirq -> hardirq, hardirq -> softirq
*
* When exiting hardirq or softirq time, account the elapsed time.
+ *
+ * When exiting softirq time, subtract the amount of hardirq time that
+ * interrupted this softirq run, to avoid double accounting of that time.
*/
void irqtime_account_irq(struct task_struct *curr, int irqtype)
{
- unsigned long flags;
+ u64 prev_softirq_start;
+ bool leaving_softirq;
+ u64 prev_hardirq;
+ u64 hardirq_time;
s64 delta;
int cpu;

if (!sched_clock_irqtime)
return;

- local_irq_save(flags);
-
cpu = smp_processor_id();
- delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
- __this_cpu_add(irq_start_time, delta);

- irq_time_write_begin();
+ /*
+ * Hardirq time accounting is pretty straightforward. If not in
+ * hardirq context yet (entering hardirq), set the start time.
+ * If already in hardirq context (leaving), account the elapsed time.
+ */
+ if (irqtype == HARDIRQ_OFFSET) {
+ bool leaving_hardirq = hardirq_count();
+ delta = sched_clock_cpu(cpu) - __this_cpu_read(hardirq_start_time);
+ __this_cpu_add(hardirq_start_time, delta);
+ if (leaving_hardirq) {
+ hardirq_time_write_begin();
+ __this_cpu_add(cpu_hardirq_time, delta);
+ hardirq_time_write_end();
+ }
+ return;
+ }
+
+ /*
+ * Softirq context may get interrupted by hardirq context, on the
+ * same CPU. At softirq entry time the amount of time this CPU spent
+ * in hardirq context is stored. At softirq exit time, the time spent
+ * in hardirq context during the softirq is subtracted.
+ */
+ prev_softirq_start = __this_cpu_read(softirq_start_time);
+ prev_hardirq = __this_cpu_read(prev_hardirq_time);
+ leaving_softirq = in_serving_softirq();
+
+ do {
+ u64 now = sched_clock_cpu(cpu);
+
+ hardirq_time = READ_ONCE(per_cpu(cpu_hardirq_time, cpu));
+ __this_cpu_write(softirq_start_time, now);
+ __this_cpu_write(prev_hardirq_time, hardirq_time);
+
+ if (leaving_softirq) {
+ /*
+ * Subtract hardirq time that happened during this
+ * softirq.
+ */
+ s64 hi_delta = hardirq_time - prev_hardirq;
+ delta = now - prev_softirq_start - hi_delta;
+ }
+ /* Loop around if interrupted by a hardirq. */
+ } while (hardirq_time != READ_ONCE(per_cpu(cpu_hardirq_time, cpu)));
+
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (hardirq_count())
- __this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+ softirq_time_write_begin();
+ if (leaving_softirq && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
-
- irq_time_write_end();
- local_irq_restore(flags);
+ softirq_time_write_end();
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d23527e..cad4df9835f7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1752,38 +1752,62 @@ DECLARE_PER_CPU(u64, cpu_softirq_time);

#ifndef CONFIG_64BIT
DECLARE_PER_CPU(seqcount_t, irq_time_seq);
+DECLARE_PER_CPU(seqcount_t, softirq_time_seq);

-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
{
__this_cpu_inc(irq_time_seq.sequence);
smp_wmb();
}

-static inline void irq_time_write_end(void)
+static inline void hardirq_time_write_end(void)
{
smp_wmb();
__this_cpu_inc(irq_time_seq.sequence);
}

+static inline void softirq_time_write_begin(void)
+{
+ __this_cpu_inc(softirq_time_seq.sequence);
+ smp_wmb();
+}
+
+static inline void softirq_time_write_end(void)
+{
+ smp_wmb();
+ __this_cpu_inc(softirq_time_seq.sequence);
+}
+
static inline u64 irq_time_read(int cpu)
{
u64 irq_time;
- unsigned seq;
+ unsigned hi_seq;
+ unsigned si_seq;

do {
- seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ hi_seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ si_seq = read_seqcount_begin(&per_cpu(softirq_time_seq, cpu));
irq_time = per_cpu(cpu_softirq_time, cpu) +
per_cpu(cpu_hardirq_time, cpu);
- } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+ } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), hi_seq) ||
+ read_seqcount_retry(&per_cpu(softirq_time_seq, cpu), si_seq));

return irq_time;
}
#else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
+static inline void hardirq_time_write_begin(void)
+{
+}
+
+static inline void hardirq_time_write_end(void)
+{
+}
+
+static inline void softirq_time_write_begin(void)
{
}

-static inline void irq_time_write_end(void)
+static inline void softirq_time_write_end(void)
{
}

--
2.7.4