[PATCH 5/5] irqtime: drop local_irq_save/restore from irqtime_account_irq
From: riel
Date: Thu Jun 16 2016 - 12:07:59 EST
From: Rik van Riel <riel@xxxxxxxxxx>
Drop local_irq_save/restore from irqtime_account_irq.
Instead, have softirq and hardirq track their time spent
independently, with the softirq code subtracting hardirq
time that happened during the duration of the softirq run.
The softirq code can be interrupted by hardirq code at
any point in time, but it can check whether it got a
consistent snapshot of the timekeeping variables it wants,
and loop around in the unlikely case that it did not.
Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>
---
kernel/sched/cputime.c | 63 ++++++++++++++++++++++++++++++++++++++++----------
kernel/sched/sched.h | 28 ++++++++++++++--------
2 files changed, 70 insertions(+), 21 deletions(-)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 07f847267e4e..ba18d51a091c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -26,7 +26,9 @@
DEFINE_PER_CPU(u64, cpu_hardirq_time);
DEFINE_PER_CPU(u64, cpu_softirq_time);
-static DEFINE_PER_CPU(u64, irq_start_time);
+static DEFINE_PER_CPU(u64, hardirq_start_time);
+static DEFINE_PER_CPU(u64, softirq_start_time);
+static DEFINE_PER_CPU(u64, prev_hardirq_time);
static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void)
@@ -41,6 +43,7 @@ void disable_sched_clock_irqtime(void)
#ifndef CONFIG_64BIT
DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+DEFINE_PER_CPU(seqcount_t, softirq_time_seq);
#endif /* CONFIG_64BIT */
/*
@@ -53,36 +56,72 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
* softirq -> hardirq, hardirq -> softirq
*
* When exiting hardirq or softirq time, account the elapsed time.
+ *
+ * When exiting softirq time, subtract the amount of hardirq time that
+ * interrupted this softirq run, to avoid double accounting of that time.
*/
void irqtime_account_irq(struct task_struct *curr, int irqtype)
{
- unsigned long flags;
- s64 delta;
+ u64 prev_softirq_start;
+ u64 prev_hardirq;
+ u64 hardirq_time;
+ s64 delta = 0;
int cpu;
if (!sched_clock_irqtime)
return;
- local_irq_save(flags);
-
cpu = smp_processor_id();
- delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
- __this_cpu_add(irq_start_time, delta);
+ /*
+ * Softirq context may get interrupted by hardirq context,
+ * on the same CPU. At softirq entry time the amount of time
+ * spent in hardirq context is stored. At softirq exit time,
+ * the time spent in hardirq context during the softirq is
+ * subtracted.
+ */
+ prev_hardirq = __this_cpu_read(prev_hardirq_time);
+ prev_softirq_start = __this_cpu_read(softirq_start_time);
+
+ if (irqtype == HARDIRQ_OFFSET) {
+ delta = sched_clock_cpu(cpu) - __this_cpu_read(hardirq_start_time);
+ __this_cpu_add(hardirq_start_time, delta);
+ } else do {
+ u64 now = sched_clock_cpu(cpu);
+ hardirq_time = READ_ONCE(per_cpu(cpu_hardirq_time, cpu));
+
+ delta = now - prev_softirq_start;
+ if (in_serving_softirq()) {
+ /*
+ * Leaving softirq context. Avoid double counting by
+ * subtracting hardirq time from this interval.
+ */
+ s64 hi_delta = hardirq_time - prev_hardirq;
+ delta -= hi_delta;
+ } else {
+ /* Entering softirq context. Note start times. */
+ __this_cpu_write(softirq_start_time, now);
+ __this_cpu_write(prev_hardirq_time, hardirq_time);
+ }
+ /*
+ * If a hardirq happened during this calculation, it may not
+ * have gotten a consistent snapshot. Try again.
+ */
+ } while (hardirq_time != READ_ONCE(per_cpu(cpu_hardirq_time, cpu)));
- irq_time_write_begin();
+ irq_time_write_begin(irqtype);
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (hardirq_count())
+ if (irqtype == HARDIRQ_OFFSET && hardirq_count())
__this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+ else if (irqtype == SOFTIRQ_OFFSET && in_serving_softirq() &&
+ curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
- irq_time_write_end();
- local_irq_restore(flags);
+ irq_time_write_end(irqtype);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d23527e..65d013447e0c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1752,38 +1752,48 @@ DECLARE_PER_CPU(u64, cpu_softirq_time);
#ifndef CONFIG_64BIT
DECLARE_PER_CPU(seqcount_t, irq_time_seq);
+DECLARE_PER_CPU(seqcount_t, softirq_time_seq);
-static inline void irq_time_write_begin(void)
+static inline void irq_time_write_begin(int irqtype)
{
- __this_cpu_inc(irq_time_seq.sequence);
+ if (irqtype == HARDIRQ_OFFSET)
+ __this_cpu_inc(irq_time_seq.sequence);
+ else
+ __this_cpu_inc(softirq_time_seq.sequence);
smp_wmb();
}
-static inline void irq_time_write_end(void)
+static inline void irq_time_write_end(int irqtype)
{
smp_wmb();
- __this_cpu_inc(irq_time_seq.sequence);
+ if (irqtype == HARDIRQ_OFFSET)
+ __this_cpu_inc(irq_time_seq.sequence);
+ else
+ __this_cpu_inc(softirq_time_seq.sequence);
}
static inline u64 irq_time_read(int cpu)
{
u64 irq_time;
- unsigned seq;
+ unsigned hi_seq;
+ unsigned si_seq;
do {
- seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ hi_seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ si_seq = read_seqcount_begin(&per_cpu(softirq_time_seq, cpu));
irq_time = per_cpu(cpu_softirq_time, cpu) +
per_cpu(cpu_hardirq_time, cpu);
- } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+ } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), hi_seq) ||
+ read_seqcount_retry(&per_cpu(softirq_time_seq, cpu), si_seq));
return irq_time;
}
#else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
+static inline void irq_time_write_begin(int irqtype)
{
}
-static inline void irq_time_write_end(void)
+static inline void irq_time_write_end(int irqtype)
{
}
--
2.5.5