Re: [PATCH 1/2] bpf: add a bpf_override_function helper

From: Alexei Starovoitov
Date: Mon Oct 30 2017 - 21:35:58 EST


On 10/30/17 2:19 PM, Josef Bacik wrote:
From: Josef Bacik <jbacik@xxxxxx>

Error injection is sloppy and very ad-hoc. BPF could fill this niche
perfectly with it's kprobe functionality. We could make sure errors are
only triggered in specific call chains that we care about with very
specific situations. Accomplish this with the bpf_override_funciton
helper. This will modify the probe'd callers return value to the
specified value and set the PC to an override function that simply
returns, bypassing the originally probed function. This gives us a nice
clean way to implement systematic error injection for all of our code
paths.

Signed-off-by: Josef Bacik <jbacik@xxxxxx>
---
arch/Kconfig | 3 +++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/kprobes.h | 4 ++++
arch/x86/include/asm/ptrace.h | 5 +++++
arch/x86/kernel/kprobes/ftrace.c | 14 ++++++++++++
include/uapi/linux/bpf.h | 7 +++++-
kernel/trace/Kconfig | 11 ++++++++++
kernel/trace/bpf_trace.c | 47 +++++++++++++++++++++++++++++++++++-----
kernel/trace/trace.h | 6 +++++
kernel/trace/trace_kprobe.c | 23 ++++++++++++++------
10 files changed, 108 insertions(+), 13 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index d789a89cb32c..4fb618082259 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -195,6 +195,9 @@ config HAVE_OPTPROBES
config HAVE_KPROBES_ON_FTRACE
bool

+config HAVE_KPROBE_OVERRIDE
+ bool
+
config HAVE_NMI
bool

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 971feac13506..5126d2750dd0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -152,6 +152,7 @@ config X86
select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
+ select HAVE_KPROBE_OVERRIDE
select HAVE_KRETPROBES
select HAVE_KVM
select HAVE_LIVEPATCH if X86_64
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 6cf65437b5e5..c6c3b1f4306a 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
asmlinkage void kretprobe_trampoline(void);

+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
+#endif
+
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 91c04c8e67fa..f04e71800c2f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -108,6 +108,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->ax;
}

+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->ax = rc;
+}
+
/*
* user_mode(regs) determines whether a register set came from user
* mode. On x86_32, this is true if V8086 mode was enabled OR if the
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 041f7b6dfa0f..3c455bf490cb 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
p->ainsn.boostable = false;
return 0;
}
+
+asmlinkage void override_func(void);
+asm(
+ ".type override_func, @function\n"
+ "override_func:\n"
+ " ret\n"
+ ".size override_func, .-override_func\n"
+);
+
+void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
+{
+ regs->ip = (unsigned long)&override_func;
+}
+NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0b7b54d898bd..1ad5b87a42f6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -673,6 +673,10 @@ union bpf_attr {
* @buf: buf to fill
* @buf_size: size of the buf
* Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ * @pt_regs: pointer to struct pt_regs
+ * @rc: the return value to set
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -732,7 +736,8 @@ union bpf_attr {
FN(xdp_adjust_meta), \
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
- FN(getsockopt),
+ FN(getsockopt), \
+ FN(override_return),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 434c840e2d82..9dc0deeaad2b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -518,6 +518,17 @@ config FUNCTION_PROFILER

If in doubt, say N.

+config BPF_KPROBE_OVERRIDE
+ bool "Enable BPF programs to override a kprobed function"
+ depends on BPF_EVENTS
+ depends on KPROBES_ON_FTRACE
+ depends on HAVE_KPROBE_OVERRIDE
+ depends on DYNAMIC_FTRACE_WITH_REGS
+ default n
+ help
+ Allows BPF to override the execution of a probed function and
+ set a different return value. This is used for error injection.
+
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 136aa6bb0422..38b6d6016b71 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,10 +13,14 @@
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
+#include <asm/kprobes.h>
+
#include "trace.h"

u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);

+static DEFINE_PER_CPU(int, pc_modified);
+
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
@@ -27,16 +31,18 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
*
* Return: BPF programs always return an integer which is interpreted by
* kprobe handler as:
- * 0 - return from kprobe (event is filtered out)
- * 1 - store kprobe event into ring buffer
- * Other values are reserved and currently alias to 1
+ * TRACE_KPROBE_SKIP - return from kprobe (event is filtered out)
+ * TRACE_KPOBE_STORE - store kprobe event into ring buffer
+ * TRACE_KPROBE_MODIFIED - we modified the registers, make sure the dispatcher
+ * skips the event and returns so the kprobe infrastructure
+ * doesn't mess with the next instruction.
*/
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
unsigned int ret;

if (in_nmi()) /* not supported yet */
- return 1;
+ return TRACE_KPROBE_STORE;

preempt_disable();

@@ -47,7 +53,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* and don't send kprobe event into ring-buffer,
* so return zero here
*/
- ret = 0;
+ ret = TRACE_KPROBE_SKIP;
goto out;
}

@@ -67,7 +73,13 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* rcu_dereference() which is accepted risk.
*/
ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
+ if (ret)
+ ret = TRACE_KPROBE_STORE;

+ if (__this_cpu_read(pc_modified)) {
+ __this_cpu_write(pc_modified, 0);
+ ret = TRACE_KPROBE_MODIFIED;

we probably need to fork trace_call_bpf() specifically for kprobes,
since this new functionality is not applicable to tracepoints and
uprobes. Like perf_event type bpf prog is using bpf_overflow_handler()

+ }
out:
__this_cpu_dec(bpf_prog_active);
preempt_enable();
@@ -76,6 +88,29 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);

+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+ __this_cpu_write(pc_modified, 1);
+ regs_set_return_value(regs, rc);
+ arch_ftrace_kprobe_override_function(regs);
+ return 0;
+}
+#else
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+ return -EINVAL;
+}
+#endif
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+ .func = bpf_override_return,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
@@ -551,6 +586,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_stackid_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
+ case BPF_FUNC_override_return:
+ return &bpf_override_return_proto;

good call to allow it on kprobes only.
It probably needs to be tighten further to allow it
in ftrace-based kprobes only.
imo 'depends on KPROBES_ON_FTRACE' isn't not enough,
since kprobe in the middle of the function will still work via trap
and won't work with this override_func().

default:
return tracing_func_proto(func_id);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 652c682707cd..317ff2e961ac 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -278,6 +278,12 @@ enum {
TRACE_ARRAY_FL_GLOBAL = (1 << 0)
};

+enum {
+ TRACE_KPROBE_SKIP = 0,
+ TRACE_KPROBE_STORE,
+ TRACE_KPROBE_MODIFIED,
+};
+
extern struct list_head ftrace_trace_arrays;

extern struct mutex trace_types_lock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index abf92e478cfb..722fc6568134 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1170,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
#ifdef CONFIG_PERF_EVENTS

/* Kprobe profile handler */
-static void
+static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1179,19 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int size, __size, dsize;
int rctx;

- if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
- return;
+ if (bpf_prog_array_valid(call)) {
+ int ret = trace_call_bpf(call, regs);

actually, can we keep trace_call_bpf() as-is and move
if (__this_cpu_read(pc_modified))
logic into here ?
I think kprobe_perf_func() runs with preempt_disabled.
May be specialized trace_call_kprobe_bpf() would be better
still to avoid double preempt_disable.