RE: [RFC] kprobes: Use percpu counter to collect nhit statistics

From: åæéå / HIRAMATUïMASAMI
Date: Fri Nov 13 2015 - 01:52:03 EST


From: Martin KaFai Lau [mailto:kafai@xxxxxx]
>
>When doing ebpf+kprobe on some hot TCP functions (e.g.
>tcp_rcv_established), the kprobe_dispatcher shows up in 'perf report'.
>
>In kprobe_dispatcher(), there is a lot of cache bouncing
>in 'tk->nhit++'. 'tk->nhit' and 'tk->tp.flags' also share
>the same cacheline.
>
>perf report (cycles:pp):
>8.30% ipv4_dst_check
>4.74% copy_user_enhanced_fast_string
>3.93% dst_release
>2.80% tcp_v4_rcv
>2.31% queued_spin_lock_slowpath
>2.30% _raw_spin_lock
>1.88% mlx4_en_process_rx_cq
>1.84% eth_get_headlen
>1.81% ip_rcv_finish
>~~~~
>1.71% kprobe_dispatcher
>~~~~
>1.55% mlx4_en_xmit
>1.09% __probe_kernel_read
>
>perf report after patch:
>9.15% ipv4_dst_check
>5.00% copy_user_enhanced_fast_string
>4.12% dst_release
>2.96% tcp_v4_rcv
>2.50% _raw_spin_lock
>2.39% queued_spin_lock_slowpath
>2.11% eth_get_headlen
>2.03% mlx4_en_process_rx_cq
>1.69% mlx4_en_xmit
>1.19% ip_rcv_finish
>1.12% __probe_kernel_read
>1.02% ehci_hcd_cleanup

Good catch! Looks good to me!

Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>

Thanks!!

>
>Signed-off-by: Martin KaFai Lau <kafai@xxxxxx>
>---
> kernel/trace/trace_kprobe.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
>diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
>index c995644..91a441b 100644
>--- a/kernel/trace/trace_kprobe.c
>+++ b/kernel/trace/trace_kprobe.c
>@@ -30,7 +30,7 @@
> struct trace_kprobe {
> struct list_head list;
> struct kretprobe rp; /* Use rp.kp for kprobe use */
>- unsigned long nhit;
>+ unsigned long * __percpu nhit;
> const char *symbol; /* symbol name */
> struct trace_probe tp;
> };
>@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> if (!tk)
> return ERR_PTR(ret);
>
>+ tk->nhit = alloc_percpu(unsigned long);
>+ if (!tk->nhit)
>+ goto error;
>+
> if (symbol) {
> tk->symbol = kstrdup(symbol, GFP_KERNEL);
> if (!tk->symbol)
>@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
> error:
> kfree(tk->tp.call.name);
> kfree(tk->symbol);
>+ free_percpu(tk->nhit);
> kfree(tk);
> return ERR_PTR(ret);
> }
>@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
> kfree(tk->tp.call.class->system);
> kfree(tk->tp.call.name);
> kfree(tk->symbol);
>+ free_percpu(tk->nhit);
> kfree(tk);
> }
>
>@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
> static int probes_profile_seq_show(struct seq_file *m, void *v)
> {
> struct trace_kprobe *tk = v;
>+ unsigned long nhit = 0;
>+ int cpu;
>+
>+ for_each_possible_cpu(cpu)
>+ nhit += *per_cpu_ptr(tk->nhit, cpu);
>
> seq_printf(m, " %-44s %15lu %15lu\n",
>- trace_event_name(&tk->tp.call), tk->nhit,
>+ trace_event_name(&tk->tp.call), nhit,
> tk->rp.kp.nmissed);
>
> return 0;
>@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
> {
> struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
>
>- tk->nhit++;
>+ raw_cpu_inc(*tk->nhit);
>
> if (tk->tp.flags & TP_FLAG_TRACE)
> kprobe_trace_func(tk, regs);
>@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
> {
> struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
>
>- tk->nhit++;
>+ raw_cpu_inc(*tk->nhit);
>
> if (tk->tp.flags & TP_FLAG_TRACE)
> kretprobe_trace_func(tk, ri, regs);
>--
>2.5.1