[PATCH v2] tracing: Don't account for cpu idle time with irqsoff tracers

From: Stephen Boyd
Date: Tue May 27 2014 - 16:08:14 EST


The idle loop calls stop_critical_timings() in an attempt to stop
the irqsoff tracer from tracing a cpu's idle time. Unfortunately,
stop_critical_timings() doesn't do much besides stop the tracer
until the point that another irqsoff event happens. This
typically happens in rcu_idle_enter() or later on in the cpuidle
driver when we take a spinlock to notify the clockevent framework
about timers that lose state during deep idle (FEAT_C3_STOP).
This leads to worthless irqsoff traces that show a large amount
of time spent in idle:

# tracer: irqsoff
#
# irqsoff latency trace v1.1.5 on 3.15.0-rc4-00002-geffda86cfdaa-dirty
# --------------------------------------------------------------------
# latency: 3997988 us, #88/88, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
# -----------------
# | task: swapper/1-0 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
# => started at: rcu_idle_enter
# => ended at: cpuidle_enter_state
#
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
<idle>-0 1d..1 2us+: trace_hardirqs_off <-rcu_idle_enter
<idle>-0 1d..2 7us+: rcu_eqs_enter_common.isra.48 <-rcu_idle_enter
<idle>-0 1d..2 12us+: cpuidle_enabled <-cpu_startup_entry
<idle>-0 1d..2 17us+: cpuidle_select <-cpu_startup_entry
<idle>-0 1d..2 21us+: menu_select <-cpuidle_select
<idle>-0 1d..2 26us+: pm_qos_request <-menu_select
<idle>-0 1d..2 30us+: tick_nohz_get_sleep_length <-menu_select
<idle>-0 1d..2 34us+: ns_to_timespec <-menu_select
<idle>-0 1d..2 39us+: nr_iowait_cpu <-menu_select
<idle>-0 1d..2 46us+: this_cpu_load <-menu_select
<idle>-0 1d..2 50us+: nr_iowait_cpu <-menu_select
<idle>-0 1d..2 55us+: clockevents_notify <-cpu_startup_entry
<idle>-0 1d..2 59us+: _raw_spin_lock_irqsave <-clockevents_notify
<idle>-0 1d..2 64us+: preempt_count_add <-_raw_spin_lock_irqsave
<idle>-0 1d..3 69us+: do_raw_spin_lock <-_raw_spin_lock_irqsave
<idle>-0 1d..3 74us+: tick_broadcast_oneshot_control <-clockevents_notify
<idle>-0 1d..3 78us+: _raw_spin_lock_irqsave <-tick_broadcast_oneshot_control
<idle>-0 1d..3 83us+: preempt_count_add <-_raw_spin_lock_irqsave
<idle>-0 1d..4 88us+: do_raw_spin_lock <-_raw_spin_lock_irqsave
<idle>-0 1d..4 93us+: clockevents_set_mode <-tick_broadcast_oneshot_control
<idle>-0 1d..4 97us+: arch_timer_set_mode_virt <-clockevents_set_mode
<idle>-0 1d..4 102us+: broadcast_needs_cpu <-tick_broadcast_oneshot_control
<idle>-0 1d..4 106us+: _raw_spin_unlock_irqrestore <-tick_broadcast_oneshot_control
<idle>-0 1d..4 110us+: do_raw_spin_unlock <-_raw_spin_unlock_irqrestore
<idle>-0 1d..4 115us+: preempt_count_sub <-_raw_spin_unlock_irqrestore
<idle>-0 1d..3 120us+: _raw_spin_unlock_irqrestore <-clockevents_notify
<idle>-0 1d..3 124us+: do_raw_spin_unlock <-_raw_spin_unlock_irqrestore
<idle>-0 1d..3 129us+: preempt_count_sub <-_raw_spin_unlock_irqrestore
<idle>-0 1d..2 133us+: cpuidle_enter <-cpu_startup_entry
<idle>-0 1d..2 137us+: cpuidle_enter_state <-cpuidle_enter
<idle>-0 1d..2 141us+: ktime_get <-cpuidle_enter_state
<idle>-0 1d..2 148us+: arch_counter_read <-ktime_get
<idle>-0 1d..2 153us+: cpu_enter_idle <-cpuidle_enter_state
<idle>-0 1d..2 157us+: cpu_pm_enter <-cpu_enter_idle
<idle>-0 1d..2 161us+: _raw_read_lock <-cpu_pm_enter
<idle>-0 1d..2 165us+: preempt_count_add <-_raw_read_lock
<idle>-0 1d..3 170us+: do_raw_read_lock <-_raw_read_lock
<idle>-0 1d..3 175us+: cpu_pm_notify <-cpu_pm_enter
<idle>-0 1d..3 179us+: __raw_notifier_call_chain <-cpu_pm_notify
<idle>-0 1d..3 183us+: notifier_call_chain <-__raw_notifier_call_chain
<idle>-0 1d..3 187us+: gic_notifier <-notifier_call_chain
<idle>-0 1d..3 192us+: arch_timer_cpu_pm_notify <-notifier_call_chain
<idle>-0 1d..3 197us+: vfp_cpu_pm_notifier <-notifier_call_chain
<idle>-0 1d..3 201us+: dbg_cpu_pm_notify <-notifier_call_chain
<idle>-0 1d..3 206us+: _raw_read_unlock <-cpu_pm_enter
<idle>-0 1d..3 210us+: do_raw_read_unlock <-_raw_read_unlock
<idle>-0 1d..3 214us!: preempt_count_sub <-_raw_read_unlock
<idle>-0 1d..2 3997820us+: cpu_pm_exit <-cpu_enter_idle
<idle>-0 1d..2 3997824us+: _raw_read_lock <-cpu_pm_exit
<idle>-0 1d..2 3997828us+: preempt_count_add <-_raw_read_lock
<idle>-0 1d..3 3997833us+: do_raw_read_lock <-_raw_read_lock
<idle>-0 1d..3 3997838us+: cpu_pm_notify <-cpu_pm_exit
<idle>-0 1d..3 3997842us+: __raw_notifier_call_chain <-cpu_pm_notify
<idle>-0 1d..3 3997846us+: notifier_call_chain <-__raw_notifier_call_chain
<idle>-0 1d..3 3997850us+: gic_notifier <-notifier_call_chain
<idle>-0 1d..3 3997856us+: arch_timer_cpu_pm_notify <-notifier_call_chain
<idle>-0 1d..3 3997860us+: vfp_cpu_pm_notifier <-notifier_call_chain
<idle>-0 1d..3 3997865us+: vfp_enable <-vfp_cpu_pm_notifier
<idle>-0 1d..3 3997869us+: dbg_cpu_pm_notify <-notifier_call_chain
<idle>-0 1d..3 3997873us+: reset_ctrl_regs <-dbg_cpu_pm_notify
<idle>-0 1d..3 3997877us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997882us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997886us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997890us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997894us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997898us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997902us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997906us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997910us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997915us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997919us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997923us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997927us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997931us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997936us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997940us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997944us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997948us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997952us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997956us+: write_wb_reg <-reset_ctrl_regs
<idle>-0 1d..3 3997960us+: _raw_read_unlock <-cpu_pm_exit
<idle>-0 1d..3 3997965us+: do_raw_read_unlock <-_raw_read_unlock
<idle>-0 1d..3 3997969us+: preempt_count_sub <-_raw_read_unlock
<idle>-0 1d..2 3997973us+: ktime_get <-cpuidle_enter_state
<idle>-0 1d..2 3997980us+: arch_counter_read <-ktime_get
<idle>-0 1d..1 3997985us+: trace_hardirqs_on <-cpuidle_enter_state
<idle>-0 1d..1 3997994us+: time_hardirqs_on <-cpuidle_enter_state
<idle>-0 1d..1 3998053us : <stack trace>
=> time_hardirqs_on
=> trace_hardirqs_on_caller
=> trace_hardirqs_on
=> cpuidle_enter_state
=> cpuidle_enter
=> cpu_startup_entry
=> secondary_start_kernel

Let's make stop_critical_timings() actually stop the tracer until
start_critical_timings() is called. This way we don't have to
worry about adding more stop_critical_timings() calls deep down
in cpuidle drivers or later on in the idle loop.

Cc: Corey Minyard <cminyard@xxxxxxxxxx>
Cc: Stanislav Meduna <stano@xxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Signed-off-by: Stephen Boyd <sboyd@xxxxxxxxxxxxxx>
---

Changes since v1:
* Micro-optimize start_critical_timing() check
* Fix comment
* Lift out timings_stopped assignments from conditional checks

kernel/trace/trace_irqsoff.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 8ff02cbb892f..278b5fe6a693 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -367,6 +367,8 @@ out:
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}

+static DEFINE_PER_CPU(bool, timings_stopped);
+
static inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip)
{
@@ -380,7 +382,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)

cpu = raw_smp_processor_id();

- if (per_cpu(tracing_cpu, cpu))
+ if (per_cpu(timings_stopped, cpu) || per_cpu(tracing_cpu, cpu))
return;

data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -418,7 +420,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
else
return;

- if (!tracer_enabled || !tracing_is_enabled())
+ if (!tracer_enabled || !tracing_is_enabled() ||
+ per_cpu(timings_stopped, cpu))
return;

data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -436,9 +439,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
atomic_dec(&data->disabled);
}

-/* start and stop critical timings used to for stoppage (in idle) */
+/* start and stop critical timings; used for stoppage (in idle) */
void start_critical_timings(void)
{
+ per_cpu(timings_stopped, raw_smp_processor_id()) = false;
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -448,6 +452,7 @@ void stop_critical_timings(void)
{
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ per_cpu(timings_stopped, raw_smp_processor_id()) = true;
}
EXPORT_SYMBOL_GPL(stop_critical_timings);

--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/