[RFC PATCH -tip 2/2] kprobes/x86: Use graph_tracer's per-thread return stack for kretprobe
From: Masami Hiramatsu
Date: Mon Aug 21 2017 - 11:43:11 EST
Use function_graph tracer's per-thread return stack for
storing kretprobe return address as fast path.
Currently kretprobe has own instance hash-list for storing
return address. However, it introduces a spin-lock for
hash list entry and compel users to estimate how many
probes run concurrently (and set it to kretprobe->maxactive).
To solve this issue, this patch reuses function_graph's
per-thread ret_stack for kretprobes as fast path
instead of using its hash-list if possible.
Note that this is just a feasibility study code, and since
the per-thread ret_stack is initialized only when the
function_graph tracer is enabled, you have to following
operation to enable it.
# echo '*' > <tracefs>/set_graph_notrace
# echo function_graph > <tracefs>/current_tracer
After that, try to add an kretprobe event with just 1
instance (anyway we don't use it).
# echo r1 vfs_write > <tracefs>/kprobe_events
# echo 1 > <tracefs>/events/kprobes/enable
And run "yes" command concurrently.
# for i in {0..31}; do yes > /dev/null & done
# cat <tracefs>/kprobe_profile
r_vfs_write_0 4756473 0
Then you will see the error count (the last column) is zero.
Currently, this feature is disabled when the function graph
tracer is stopped, so if you set nop tracer as below,
# echo nop > <tracefs>/current_tracer
Then you'll see the error count is increasing.
# cat <tracefs>/kprobe_profile
r_vfs_write_0 7663462 238537
Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---
arch/x86/kernel/kprobes/core.c | 95 ++++++++++++++++++++++++++++++++++
include/linux/ftrace.h | 3 +
kernel/kprobes.c | 11 ++++
kernel/trace/trace_functions_graph.c | 5 +-
4 files changed, 111 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..2950a6187d0f 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -545,6 +545,101 @@ static nokprobe_inline void restore_btf(void)
}
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+__visible __used void *trampoline_fast_handler(struct pt_regs *regs)
+{
+ struct kretprobe_instance ri; /* fake instance */
+ struct ftrace_graph_ret trace;
+ unsigned long ret_addr;
+
+ ftrace_pop_return_trace(&trace, &ret_addr, 0);
+ barrier();
+ current->curr_ret_stack--;
+ if (current->curr_ret_stack < -1)
+ current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
+
+ if (unlikely(!ret_addr || ret_addr == (unsigned long)panic)) {
+ ret_addr = (unsigned long)panic;
+ goto out;
+ }
+
+ ri.rp = (struct kretprobe *)trace.func;
+ if (ri.rp->handler) {
+ ri.ret_addr = (void *)ret_addr;
+ ri.task = current;
+
+ preempt_disable();
+ __this_cpu_write(current_kprobe, &(ri.rp->kp));
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri.rp->handler(&ri, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable_no_resched();
+ }
+out:
+ return (void *)ret_addr;
+}
+
+asmlinkage void kretprobe_trampoline_fast(void);
+/*
+ * When a retprobed function returns, this code saves registers and
+ * calls trampoline_handler() runs, which calls the kretprobe's handler.
+ */
+asm(
+ ".global kretprobe_trampoline_fast\n"
+ ".type kretprobe_trampoline_fast, @function\n"
+ "kretprobe_trampoline_fast:\n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rdi\n"
+ " call trampoline_fast_handler\n"
+ /* Replace saved sp with true return address. */
+ " movq %rax, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ " popfq\n"
+#else
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %eax\n"
+ " call trampoline_fast_handler\n"
+ /* Move flags to cs */
+ " movl 56(%esp), %edx\n"
+ " movl %edx, 52(%esp)\n"
+ /* Replace saved flags with true return address. */
+ " movl %eax, 56(%esp)\n"
+ RESTORE_REGS_STRING
+ " popf\n"
+#endif
+ " ret\n"
+ ".size kretprobe_trampoline_fast, .-kretprobe_trampoline_fast\n"
+);
+NOKPROBE_SYMBOL(kretprobe_trampoline_fast);
+STACK_FRAME_NON_STANDARD(kretprobe_trampoline_fast);
+
+int arch_prepare_kretprobe_fast(struct kretprobe *rp, struct pt_regs *regs)
+{
+ unsigned long *parent = stack_addr(regs);
+ unsigned long old = *parent;
+ int depth = 0;
+ int ret;
+
+ /* Replace the return addr with trampoline addr */
+ *parent = (unsigned long) &kretprobe_trampoline_fast;
+
+ /* Push on the per-thread return stack */
+ ret = ftrace_push_return_trace(old, (unsigned long)rp, &depth, 0,
+ parent);
+ if (ret)
+ *parent = old;
+
+ return ret;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe_fast);
+#endif
+
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
unsigned long *sara = stack_addr(regs);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6383115e9d2c..e9986fccb0e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -823,6 +823,9 @@ extern void return_to_handler(void);
extern int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
unsigned long frame_pointer, unsigned long *retp);
+extern void
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+ unsigned long frame_pointer);
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index a1606a4224e1..99c50f4db4d8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1839,6 +1839,13 @@ void unregister_jprobes(struct jprobe **jps, int num)
EXPORT_SYMBOL_GPL(unregister_jprobes);
#ifdef CONFIG_KRETPROBES
+int __weak arch_prepare_kretprobe_fast(struct kretprobe *rp,
+ struct pt_regs *regs)
+{
+ return -ENOTSUPP;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe_fast);
+
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
* hits it will set up the return probe.
@@ -1860,6 +1867,10 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
return 0;
}
+ /* Try to use fastpath (lockless) */
+ if (!rp->entry_handler && !arch_prepare_kretprobe_fast(rp, regs))
+ return 0;
+
/* TODO: consider to only swap the RA after the last pre_handler fired */
hash = hash_ptr(current, KPROBE_HASH_BITS);
raw_spin_lock_irqsave(&rp->lock, flags);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d56123cdcc89..bf71349d0bd0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -182,9 +182,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
}
/* Retrieve a function return address to the trace stack on thread info.*/
-static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
- unsigned long frame_pointer)
+void ftrace_pop_return_trace(struct ftrace_graph_ret *trace,
+ unsigned long *ret, unsigned long frame_pointer)
{
int index;