[for-next][PATCH 2/5] ftrace: Do not disable interrupts in profiler

From: Steven Rostedt
Date: Tue Dec 24 2024 - 14:38:27 EST


From: Steven Rostedt <rostedt@xxxxxxxxxxx>

The function profiler disables interrupts before processing. This was
there since the profiler was introduced back in 2009 when there were
recursion issues to deal with. The function tracer is much more robust
today and has its own internal recursion protection. There's no reason to
disable interrupts in the function profiler.

Instead, just disable preemption and use the guard() infrastructure while
at it.

Before this change:

~# echo 1 > /sys/kernel/tracing/function_profile_enabled
~# perf stat -r 10 ./hackbench 10
Time: 3.099
Time: 2.556
Time: 2.500
Time: 2.705
Time: 2.985
Time: 2.959
Time: 2.859
Time: 2.621
Time: 2.742
Time: 2.631

Performance counter stats for '/work/c/hackbench 10' (10 runs):

23,156.77 msec task-clock # 6.951 CPUs utilized ( +- 2.36% )
18,306 context-switches # 790.525 /sec ( +- 5.95% )
495 cpu-migrations # 21.376 /sec ( +- 8.61% )
11,522 page-faults # 497.565 /sec ( +- 1.80% )
47,967,124,606 cycles # 2.071 GHz ( +- 0.41% )
80,009,078,371 instructions # 1.67 insn per cycle ( +- 0.34% )
16,389,249,798 branches # 707.752 M/sec ( +- 0.36% )
139,943,109 branch-misses # 0.85% of all branches ( +- 0.61% )

3.332 +- 0.101 seconds time elapsed ( +- 3.04% )

After this change:

~# echo 1 > /sys/kernel/tracing/function_profile_enabled
~# perf stat -r 10 ./hackbench 10
Time: 1.869
Time: 1.428
Time: 1.575
Time: 1.569
Time: 1.685
Time: 1.511
Time: 1.611
Time: 1.672
Time: 1.724
Time: 1.715

Performance counter stats for '/work/c/hackbench 10' (10 runs):

13,578.21 msec task-clock # 6.931 CPUs utilized ( +- 2.23% )
12,736 context-switches # 937.973 /sec ( +- 3.86% )
341 cpu-migrations # 25.114 /sec ( +- 5.27% )
11,378 page-faults # 837.960 /sec ( +- 1.74% )
27,638,039,036 cycles # 2.035 GHz ( +- 0.27% )
45,107,762,498 instructions # 1.63 insn per cycle ( +- 0.23% )
8,623,868,018 branches # 635.125 M/sec ( +- 0.27% )
125,738,443 branch-misses # 1.46% of all branches ( +- 0.32% )

1.9590 +- 0.0484 seconds time elapsed ( +- 2.47% )

Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/20241223184941.373853944@xxxxxxxxxxx
Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
kernel/trace/ftrace.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9b17efb1a87d..63a9ffa65e17 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -789,27 +789,24 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
- unsigned long flags;

if (!ftrace_profile_enabled)
return;

- local_irq_save(flags);
+ guard(preempt_notrace)();

stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;

rec = ftrace_find_profiled_func(stat, ip);
if (!rec) {
rec = ftrace_profile_alloc(stat, ip);
if (!rec)
- goto out;
+ return;
}

rec->counter++;
- out:
- local_irq_restore(flags);
}

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -856,19 +853,19 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
unsigned long long calltime;
unsigned long long rettime = trace_clock_local();
struct ftrace_profile *rec;
- unsigned long flags;
int size;

- local_irq_save(flags);
+ guard(preempt_notrace)();
+
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;

profile_data = fgraph_retrieve_data(gops->idx, &size);

/* If the calltime was zero'd ignore it */
if (!profile_data || !profile_data->calltime)
- goto out;
+ return;

calltime = rettime - profile_data->calltime;

@@ -896,9 +893,6 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
rec->time += calltime;
rec->time_squared += calltime * calltime;
}
-
- out:
- local_irq_restore(flags);
}

static struct fgraph_ops fprofiler_ops = {
--
2.45.2