[PATCH bpf-next] bpf: ringbuf: Support consuming BPF_MAP_TYPE_RINGBUF from prog

From: Daniel Xu
Date: Mon Sep 09 2024 - 20:55:21 EST


Right now there exists prog produce / userspace consume and userspace
produce / prog consume support. But it is also useful to have prog
produce / prog consume.

For example, we want to track the latency overhead of cpumap in
production. Since we need to store enqueue timestamps somewhere and
cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF
ringbuf is such a data structure. Rather than reimplement (possibly
poorly) a custom ringbuffer in BPF, extend the existing interface to
allow the final quadrant of ringbuf usecases to be filled. Note we
ignore userspace to userspace use case - there is no need to involve
kernel for that.

Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
---
kernel/bpf/verifier.c | 6 +-
tools/testing/selftests/bpf/Makefile | 3 +-
.../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++
.../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++
4 files changed, 120 insertions(+), 3 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 53d0556fbbf3..56bfe559f228 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_ringbuf_query &&
func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
func_id != BPF_FUNC_ringbuf_submit_dynptr &&
- func_id != BPF_FUNC_ringbuf_discard_dynptr)
+ func_id != BPF_FUNC_ringbuf_discard_dynptr &&
+ func_id != BPF_FUNC_user_ringbuf_drain)
goto error;
break;
case BPF_MAP_TYPE_USER_RINGBUF:
@@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
goto error;
break;
case BPF_FUNC_user_ringbuf_drain:
- if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
+ if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF &&
+ map->map_type != BPF_MAP_TYPE_RINGBUF)
goto error;
break;
case BPF_FUNC_get_stackid:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 9905e3739dd0..233109843d4d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -503,7 +503,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
+ test_ringbuf_bpf_to_bpf.c

# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..3e7955573ac5 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
#include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_bpf_to_bpf.lskel.h"

#define EDONE 7777

@@ -497,6 +498,53 @@ static void ringbuf_map_key_subtest(void)
test_ringbuf_map_key_lskel__destroy(skel_map_key);
}

+static void ringbuf_bpf_to_bpf_subtest(void)
+{
+ struct test_ringbuf_bpf_to_bpf_lskel *skel;
+ int err, i;
+
+ skel = test_ringbuf_bpf_to_bpf_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "test_ringbuf_bpf_to_bpf_lskel__open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = getpagesize();
+ skel->bss->pid = getpid();
+
+ err = test_ringbuf_bpf_to_bpf_lskel__load(skel);
+ if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, NULL, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_bpf_to_bpf_lskel__attach(skel);
+ if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ /* Produce N_SAMPLES samples in the ring buffer by calling getpid() */
+ skel->bss->value = SAMPLE_VALUE;
+ for (i = 0; i < N_SAMPLES; i++)
+ syscall(__NR_getpgid);
+
+ /* Trigger bpf-side consumption */
+ syscall(__NR_prctl);
+
+ /* Check correct number samples were consumed */
+ if (!ASSERT_EQ(skel->bss->round_tripped, N_SAMPLES, "round_tripped"))
+ goto cleanup_ringbuf;
+
+ /* Check all samples were consumed */
+ err = ring_buffer__consume(ringbuf);
+ if (!ASSERT_EQ(err, 0, "rb_consume"))
+ goto cleanup_ringbuf;
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_bpf_to_bpf_lskel__destroy(skel);
+}
+
void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
@@ -507,4 +555,6 @@ void test_ringbuf(void)
ringbuf_map_key_subtest();
if (test__start_subtest("ringbuf_write"))
ringbuf_write_subtest();
+ if (test__start_subtest("ringbuf_bpf_to_bpf"))
+ ringbuf_bpf_to_bpf_subtest();
}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c b/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
new file mode 100644
index 000000000000..378154922024
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct sample {
+ long value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int pid = 0;
+long value = 0;
+int round_tripped = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_bpf_to_bpf_produce(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+ sample->value = value;
+
+ bpf_ringbuf_submit(sample, 0);
+ return 0;
+}
+
+static long consume_cb(struct bpf_dynptr *dynptr, void *context)
+{
+ struct sample *sample = NULL;
+
+ sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
+ if (!sample)
+ return 0;
+
+ if (sample->value == value)
+ round_tripped++;
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prctl")
+int test_ringbuf_bpf_to_bpf_consume(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != pid)
+ return 0;
+
+ bpf_user_ringbuf_drain(&ringbuf, consume_cb, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.46.0