Re: [PATCH 2/5 v2] tracing: Replace the per_cpu() with this_cpu() in trace_stack.c
From: Paul E. McKenney
Date: Fri Apr 07 2017 - 10:36:31 EST
On Fri, Apr 07, 2017 at 10:01:08AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <rostedt@xxxxxxxxxxx>
>
> The updates to the trace_active per cpu variable can be updated with the
> this_cpu_*() functions as it only gets updated on the CPU that the variable
> is on.
>
> Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
> ---
> kernel/trace/trace_stack.c | 23 +++++++----------------
> 1 file changed, 7 insertions(+), 16 deletions(-)
>
> diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
> index 5fb1f2c87e6b..05ad2b86461e 100644
> --- a/kernel/trace/trace_stack.c
> +++ b/kernel/trace/trace_stack.c
> @@ -207,13 +207,12 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *op, struct pt_regs *pt_regs)
> {
> unsigned long stack;
> - int cpu;
>
> preempt_disable_notrace();
>
> - cpu = raw_smp_processor_id();
> /* no atomic needed, we only modify this variable by this cpu */
> - if (per_cpu(trace_active, cpu)++ != 0)
> + this_cpu_inc(trace_active);
For whatever it is worth...
I was about to complain that this_cpu_inc() only disables preemption,
not interrupts, but then I realized that any correct interrupt handler
would have to restore the per-CPU variable to its original value.
Presumably you have to sum up all the per-CPU trace_active counts,
given that there is no guarantee that a process-level dec will happen
on the same CPU that did the inc.
Thanx, Paul
> + if (this_cpu_read(trace_active) != 1)
> goto out;
>
> ip += MCOUNT_INSN_SIZE;
> @@ -221,7 +220,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
> check_stack(ip, &stack);
>
> out:
> - per_cpu(trace_active, cpu)--;
> + this_cpu_dec(trace_active);
> /* prevent recursion in schedule */
> preempt_enable_notrace();
> }
> @@ -253,7 +252,6 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
> long *ptr = filp->private_data;
> unsigned long val, flags;
> int ret;
> - int cpu;
>
> ret = kstrtoul_from_user(ubuf, count, 10, &val);
> if (ret)
> @@ -266,14 +264,13 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
> * we will cause circular lock, so we also need to increase
> * the percpu trace_active here.
> */
> - cpu = smp_processor_id();
> - per_cpu(trace_active, cpu)++;
> + this_cpu_inc(trace_active);
>
> arch_spin_lock(&stack_trace_max_lock);
> *ptr = val;
> arch_spin_unlock(&stack_trace_max_lock);
>
> - per_cpu(trace_active, cpu)--;
> + this_cpu_dec(trace_active);
> local_irq_restore(flags);
>
> return count;
> @@ -307,12 +304,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
>
> static void *t_start(struct seq_file *m, loff_t *pos)
> {
> - int cpu;
> -
> local_irq_disable();
>
> - cpu = smp_processor_id();
> - per_cpu(trace_active, cpu)++;
> + this_cpu_inc(trace_active);
>
> arch_spin_lock(&stack_trace_max_lock);
>
> @@ -324,12 +318,9 @@ static void *t_start(struct seq_file *m, loff_t *pos)
>
> static void t_stop(struct seq_file *m, void *p)
> {
> - int cpu;
> -
> arch_spin_unlock(&stack_trace_max_lock);
>
> - cpu = smp_processor_id();
> - per_cpu(trace_active, cpu)--;
> + this_cpu_dec(trace_active);
>
> local_irq_enable();
> }
> --
> 2.10.2
>
>