[PATCH bpf-next 1/5] bpf: introduce BPF_PROG_TYPE_USER

From: Song Liu
Date: Sat Aug 01 2020 - 04:50:00 EST


As of today, to trigger BPF program from user space, the common practise
is to create a uprobe on a special function and calls that function. For
example, bpftrace uses BEGIN_trigger and END_trigger for the BEGIN and END
programs.

However, uprobe is not ideal for this use case. First, uprobe uses trap,
which adds non-trivial overhead. Second, uprobe requires calculating
function offset at runtime, which is not very reliable. bpftrace has
seen issues with this:
https://github.com/iovisor/bpftrace/pull/1438
https://github.com/iovisor/bpftrace/issues/1440

This patch introduces a new BPF program type BPF_PROG_TYPE_USER, or "user
program". User program is triggered via sys_bpf(BPF_PROG_TEST_RUN), which
is significant faster than a trap.

To make user program more flexible, we enabled the following features:
1. The user can specify on which cpu the program should run. If the
target cpu is not current cpu, the program is triggered via IPI.
2. User can pass optional argument to user program. Currently, the
argument can only be 5x u64 numbers.

User program has access to helper functions in bpf_tracing_func_proto()
and bpf_get_stack|stackid().

Signed-off-by: Song Liu <songliubraving@xxxxxx>
---
include/linux/bpf_types.h | 2 +
include/uapi/linux/bpf.h | 19 ++++++
kernel/bpf/syscall.c | 3 +-
kernel/trace/bpf_trace.c | 121 +++++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 19 ++++++
5 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a52a5688418e5..3c52f3207aced 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -76,6 +76,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension,
BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
void *, void *)
#endif /* CONFIG_BPF_LSM */
+BPF_PROG_TYPE(BPF_PROG_TYPE_USER, user,
+ void *, void *)
#endif

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index eb5e0c38eb2cf..f6b9d4e7eeb4e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -190,6 +190,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_PROG_TYPE_USER,
};

enum bpf_attach_type {
@@ -556,6 +557,12 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
+ __u32 cpu_plus; /* run this program on cpu
+ * (cpu_plus - 1).
+ * If cpu_plus == 0, run on
+ * current cpu. Only valid
+ * for BPF_PROG_TYPE_USER.
+ */
} test;

struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -4441,4 +4448,16 @@ struct bpf_sk_lookup {
__u32 local_port; /* Host byte order */
};

+struct pt_regs;
+
+#define BPF_USER_PROG_MAX_ARGS 5
+struct bpf_user_prog_args {
+ __u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
+struct bpf_user_prog_ctx {
+ struct pt_regs *regs;
+ __u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cd3d599e9e90e..f5a28fd8a9bc2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2078,6 +2078,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
case BPF_PROG_TYPE_EXT: /* extends any prog */
+ case BPF_PROG_TYPE_USER:
return true;
default:
return false;
@@ -2969,7 +2970,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
}
}

-#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu_plus

static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cb91ef902cc43..cbe789bc1b986 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -16,6 +16,7 @@
#include <linux/error-injection.h>
#include <linux/btf_ids.h>

+#include <asm/irq_regs.h>
#include <asm/tlb.h>

#include "trace_probe.h"
@@ -1740,6 +1741,126 @@ const struct bpf_verifier_ops perf_event_verifier_ops = {
const struct bpf_prog_ops perf_event_prog_ops = {
};

+struct bpf_user_prog_test_run_info {
+ struct bpf_prog *prog;
+ struct bpf_user_prog_ctx ctx;
+ u32 retval;
+};
+
+static void
+__bpf_prog_test_run_user(struct bpf_user_prog_test_run_info *info)
+{
+ rcu_read_lock();
+ migrate_disable();
+ info->retval = BPF_PROG_RUN(info->prog, &info->ctx);
+ migrate_enable();
+ rcu_read_unlock();
+}
+
+static void _bpf_prog_test_run_user(void *data)
+{
+ struct bpf_user_prog_test_run_info *info = data;
+
+ info->ctx.regs = get_irq_regs();
+ __bpf_prog_test_run_user(info);
+}
+
+static int bpf_prog_test_run_user(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ __u32 data_size = kattr->test.data_size_in;
+ struct bpf_user_prog_test_run_info info;
+ int cpu = kattr->test.cpu_plus - 1;
+ int err;
+
+ if (kattr->test.ctx_in || kattr->test.ctx_out ||
+ kattr->test.duration || kattr->test.repeat ||
+ kattr->test.data_out)
+ return -EINVAL;
+
+ if ((data_in && !data_size) || (!data_in && data_size))
+ return -EINVAL;
+
+ /* if provided, data_in should be struct bpf_user_prog_args */
+ if (data_size > 0 && data_size != sizeof(struct bpf_user_prog_args))
+ return -EINVAL;
+
+ if (kattr->test.data_size_in) {
+ if (copy_from_user(&info.ctx.args, data_in,
+ sizeof(struct bpf_user_prog_args)))
+ return -EFAULT;
+ } else {
+ memset(&info.ctx.args, 0, sizeof(struct bpf_user_prog_args));
+ }
+
+ info.prog = prog;
+
+ if (!kattr->test.cpu_plus || cpu == smp_processor_id()) {
+ /* non-IPI, use regs from perf_fetch_caller_regs */
+ info.ctx.regs = get_bpf_raw_tp_regs();
+ if (IS_ERR(info.ctx.regs))
+ return PTR_ERR(info.ctx.regs);
+ perf_fetch_caller_regs(info.ctx.regs);
+ __bpf_prog_test_run_user(&info);
+ put_bpf_raw_tp_regs();
+ } else {
+ err = smp_call_function_single(cpu, _bpf_prog_test_run_user,
+ &info, 1);
+ if (err)
+ return err;
+ }
+
+ if (copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static bool user_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_u64 = sizeof(u64);
+
+ if (off < 0 || off >= sizeof(struct bpf_user_prog_ctx))
+ return false;
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_user_prog_ctx, regs):
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+ return false;
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+static const struct bpf_func_proto *
+user_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_get_stackid:
+ return &bpf_get_stackid_proto;
+ case BPF_FUNC_get_stack:
+ return &bpf_get_stack_proto;
+ default:
+ return bpf_tracing_func_proto(func_id, prog);
+ }
+}
+
+const struct bpf_verifier_ops user_verifier_ops = {
+ .get_func_proto = user_prog_func_proto,
+ .is_valid_access = user_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops user_prog_ops = {
+ .test_run = bpf_prog_test_run_user,
+};
+
static DEFINE_MUTEX(bpf_event_mutex);

#define BPF_TRACE_MAX_PROGS 64
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index eb5e0c38eb2cf..f6b9d4e7eeb4e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -190,6 +190,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_PROG_TYPE_USER,
};

enum bpf_attach_type {
@@ -556,6 +557,12 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
+ __u32 cpu_plus; /* run this program on cpu
+ * (cpu_plus - 1).
+ * If cpu_plus == 0, run on
+ * current cpu. Only valid
+ * for BPF_PROG_TYPE_USER.
+ */
} test;

struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -4441,4 +4448,16 @@ struct bpf_sk_lookup {
__u32 local_port; /* Host byte order */
};

+struct pt_regs;
+
+#define BPF_USER_PROG_MAX_ARGS 5
+struct bpf_user_prog_args {
+ __u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
+struct bpf_user_prog_ctx {
+ struct pt_regs *regs;
+ __u64 args[BPF_USER_PROG_MAX_ARGS];
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
--
2.24.1