Re: [PATCH] tracing: Don't account for cpu idle time with irqsoff tracers
From: Stephen Boyd
Date: Tue May 27 2014 - 14:05:19 EST
Ping?
On 05/19/14 12:30, Stephen Boyd wrote:
> The idle loop calls stop_critical_timings() in an attempt to stop
> the irqsoff tracer from tracing a cpu's idle time. Unfortunately,
> stop_critical_timings() doesn't do much besides stop the tracer
> until the point that another irqsoff event happens. This typically
> happens right after in rcu_idle_enter() or later on in the cpuidle
> driver when we take a spinlock to notify the clockevent framework
> about timers that lose state during deep idle (FEAT_C3_STOP).
> This leads to worthless irqsoff traces that show a large amount
> of time spent in idle:
>
> # tracer: irqsoff
> #
> # irqsoff latency trace v1.1.5 on 3.15.0-rc4-00002-geffda86cfdaa-dirty
> # --------------------------------------------------------------------
> # latency: 3997988 us, #88/88, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
> # -----------------
> # | task: swapper/1-0 (uid:0 nice:0 policy:0 rt_prio:0)
> # -----------------
> # => started at: rcu_idle_enter
> # => ended at: cpuidle_enter_state
> #
> #
> # _------=> CPU#
> # / _-----=> irqs-off
> # | / _----=> need-resched
> # || / _---=> hardirq/softirq
> # ||| / _--=> preempt-depth
> # |||| / delay
> # cmd pid ||||| time | caller
> # \ / ||||| \ | /
> <idle>-0 1d..1 2us+: trace_hardirqs_off <-rcu_idle_enter
> <idle>-0 1d..2 7us+: rcu_eqs_enter_common.isra.48 <-rcu_idle_enter
> <idle>-0 1d..2 12us+: cpuidle_enabled <-cpu_startup_entry
> <idle>-0 1d..2 17us+: cpuidle_select <-cpu_startup_entry
> <idle>-0 1d..2 21us+: menu_select <-cpuidle_select
> <idle>-0 1d..2 26us+: pm_qos_request <-menu_select
> <idle>-0 1d..2 30us+: tick_nohz_get_sleep_length <-menu_select
> <idle>-0 1d..2 34us+: ns_to_timespec <-menu_select
> <idle>-0 1d..2 39us+: nr_iowait_cpu <-menu_select
> <idle>-0 1d..2 46us+: this_cpu_load <-menu_select
> <idle>-0 1d..2 50us+: nr_iowait_cpu <-menu_select
> <idle>-0 1d..2 55us+: clockevents_notify <-cpu_startup_entry
> <idle>-0 1d..2 59us+: _raw_spin_lock_irqsave <-clockevents_notify
> <idle>-0 1d..2 64us+: preempt_count_add <-_raw_spin_lock_irqsave
> <idle>-0 1d..3 69us+: do_raw_spin_lock <-_raw_spin_lock_irqsave
> <idle>-0 1d..3 74us+: tick_broadcast_oneshot_control <-clockevents_notify
> <idle>-0 1d..3 78us+: _raw_spin_lock_irqsave <-tick_broadcast_oneshot_control
> <idle>-0 1d..3 83us+: preempt_count_add <-_raw_spin_lock_irqsave
> <idle>-0 1d..4 88us+: do_raw_spin_lock <-_raw_spin_lock_irqsave
> <idle>-0 1d..4 93us+: clockevents_set_mode <-tick_broadcast_oneshot_control
> <idle>-0 1d..4 97us+: arch_timer_set_mode_virt <-clockevents_set_mode
> <idle>-0 1d..4 102us+: broadcast_needs_cpu <-tick_broadcast_oneshot_control
> <idle>-0 1d..4 106us+: _raw_spin_unlock_irqrestore <-tick_broadcast_oneshot_control
> <idle>-0 1d..4 110us+: do_raw_spin_unlock <-_raw_spin_unlock_irqrestore
> <idle>-0 1d..4 115us+: preempt_count_sub <-_raw_spin_unlock_irqrestore
> <idle>-0 1d..3 120us+: _raw_spin_unlock_irqrestore <-clockevents_notify
> <idle>-0 1d..3 124us+: do_raw_spin_unlock <-_raw_spin_unlock_irqrestore
> <idle>-0 1d..3 129us+: preempt_count_sub <-_raw_spin_unlock_irqrestore
> <idle>-0 1d..2 133us+: cpuidle_enter <-cpu_startup_entry
> <idle>-0 1d..2 137us+: cpuidle_enter_state <-cpuidle_enter
> <idle>-0 1d..2 141us+: ktime_get <-cpuidle_enter_state
> <idle>-0 1d..2 148us+: arch_counter_read <-ktime_get
> <idle>-0 1d..2 153us+: cpu_enter_idle <-cpuidle_enter_state
> <idle>-0 1d..2 157us+: cpu_pm_enter <-cpu_enter_idle
> <idle>-0 1d..2 161us+: _raw_read_lock <-cpu_pm_enter
> <idle>-0 1d..2 165us+: preempt_count_add <-_raw_read_lock
> <idle>-0 1d..3 170us+: do_raw_read_lock <-_raw_read_lock
> <idle>-0 1d..3 175us+: cpu_pm_notify <-cpu_pm_enter
> <idle>-0 1d..3 179us+: __raw_notifier_call_chain <-cpu_pm_notify
> <idle>-0 1d..3 183us+: notifier_call_chain <-__raw_notifier_call_chain
> <idle>-0 1d..3 187us+: gic_notifier <-notifier_call_chain
> <idle>-0 1d..3 192us+: arch_timer_cpu_pm_notify <-notifier_call_chain
> <idle>-0 1d..3 197us+: vfp_cpu_pm_notifier <-notifier_call_chain
> <idle>-0 1d..3 201us+: dbg_cpu_pm_notify <-notifier_call_chain
> <idle>-0 1d..3 206us+: _raw_read_unlock <-cpu_pm_enter
> <idle>-0 1d..3 210us+: do_raw_read_unlock <-_raw_read_unlock
> <idle>-0 1d..3 214us!: preempt_count_sub <-_raw_read_unlock
> <idle>-0 1d..2 3997820us+: cpu_pm_exit <-cpu_enter_idle
> <idle>-0 1d..2 3997824us+: _raw_read_lock <-cpu_pm_exit
> <idle>-0 1d..2 3997828us+: preempt_count_add <-_raw_read_lock
> <idle>-0 1d..3 3997833us+: do_raw_read_lock <-_raw_read_lock
> <idle>-0 1d..3 3997838us+: cpu_pm_notify <-cpu_pm_exit
> <idle>-0 1d..3 3997842us+: __raw_notifier_call_chain <-cpu_pm_notify
> <idle>-0 1d..3 3997846us+: notifier_call_chain <-__raw_notifier_call_chain
> <idle>-0 1d..3 3997850us+: gic_notifier <-notifier_call_chain
> <idle>-0 1d..3 3997856us+: arch_timer_cpu_pm_notify <-notifier_call_chain
> <idle>-0 1d..3 3997860us+: vfp_cpu_pm_notifier <-notifier_call_chain
> <idle>-0 1d..3 3997865us+: vfp_enable <-vfp_cpu_pm_notifier
> <idle>-0 1d..3 3997869us+: dbg_cpu_pm_notify <-notifier_call_chain
> <idle>-0 1d..3 3997873us+: reset_ctrl_regs <-dbg_cpu_pm_notify
> <idle>-0 1d..3 3997877us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997882us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997886us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997890us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997894us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997898us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997902us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997906us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997910us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997915us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997919us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997923us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997927us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997931us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997936us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997940us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997944us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997948us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997952us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997956us+: write_wb_reg <-reset_ctrl_regs
> <idle>-0 1d..3 3997960us+: _raw_read_unlock <-cpu_pm_exit
> <idle>-0 1d..3 3997965us+: do_raw_read_unlock <-_raw_read_unlock
> <idle>-0 1d..3 3997969us+: preempt_count_sub <-_raw_read_unlock
> <idle>-0 1d..2 3997973us+: ktime_get <-cpuidle_enter_state
> <idle>-0 1d..2 3997980us+: arch_counter_read <-ktime_get
> <idle>-0 1d..1 3997985us+: trace_hardirqs_on <-cpuidle_enter_state
> <idle>-0 1d..1 3997994us+: time_hardirqs_on <-cpuidle_enter_state
> <idle>-0 1d..1 3998053us : <stack trace>
> => time_hardirqs_on
> => trace_hardirqs_on_caller
> => trace_hardirqs_on
> => cpuidle_enter_state
> => cpuidle_enter
> => cpu_startup_entry
> => secondary_start_kernel
>
> Let's make stop_critical_timings() actually stop the tracer until
> start_critical_timings() is called. This way we don't have to
> worry about adding more stop_critical_timings() calls deep down
> in cpuidle drivers or later on in the idle loop.
>
> Signed-off-by: Stephen Boyd <sboyd@xxxxxxxxxxxxxx>
> ---
>
> kernel/trace/trace_irqsoff.c | 20 ++++++++++++++------
> 1 file changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 8ff02cbb892f..ea5f5d484150 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -367,6 +367,8 @@ out:
> __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
> }
>
> +static DEFINE_PER_CPU(bool, timings_stopped);
> +
> static inline void
> start_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> @@ -375,11 +377,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
> struct trace_array_cpu *data;
> unsigned long flags;
>
> - if (!tracer_enabled || !tracing_is_enabled())
> - return;
> -
> cpu = raw_smp_processor_id();
>
> + if (!tracer_enabled || !tracing_is_enabled() ||
> + per_cpu(timings_stopped, cpu))
> + return;
> +
> if (per_cpu(tracing_cpu, cpu))
> return;
>
> @@ -418,7 +421,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> else
> return;
>
> - if (!tracer_enabled || !tracing_is_enabled())
> + if (!tracer_enabled || !tracing_is_enabled() ||
> + per_cpu(timings_stopped, cpu))
> return;
>
> data = per_cpu_ptr(tr->trace_buffer.data, cpu);
> @@ -439,15 +443,19 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> /* start and stop critical timings used to for stoppage (in idle) */
> void start_critical_timings(void)
> {
> - if (preempt_trace() || irq_trace())
> + if (preempt_trace() || irq_trace()) {
> + per_cpu(timings_stopped, raw_smp_processor_id()) = false;
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> + }
> }
> EXPORT_SYMBOL_GPL(start_critical_timings);
>
> void stop_critical_timings(void)
> {
> - if (preempt_trace() || irq_trace())
> + if (preempt_trace() || irq_trace()) {
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> + per_cpu(timings_stopped, raw_smp_processor_id()) = true;
> + }
> }
> EXPORT_SYMBOL_GPL(stop_critical_timings);
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/