Re: [RFC PATCH] bpf: Add new bpf map type for timer

From: He Kuang
Date: Wed Oct 21 2015 - 06:07:11 EST


ping and add ast@xxxxxxxxxxxx, what's your opinion on this?

On 2015/10/19 13:34, He Kuang wrote:
This patch implements a timer map type inherited from array map. eBPF
programs can deloy a timer by updating an entry in timer map, and
destroy that by deleting the entry. The timer delay time(ns) is set by
updating the value field of the entries.

Currently, an intended empty function is called when the timer is
triggered, then eBPF programs can be attatched to this function and
perfrom customized functions.

Signed-off-by: He Kuang <hekuang@xxxxxxxxxx>
---

Here's a hypothetical scenario to illustrate the use of timer map.

A video frame is updated between frame_refresh_start() and
frame_refresh_end(), in most cases, the interval between these two
functions is less than 40ms, but occasionally over 200ms.

We can set a timer which alarm after the frame_refresh_start() has
been executed 42ms(slightly larger than 40ms) and destory this timer
if frame_refresh_end() is called. So, for most cases, the timer is not
triggered, this can significantly reduce the amount of trace data.

eBPF progs:

struct bpf_map_def SEC("maps") timer_map = {
.type = BPF_MAP_TYPE_TIMER_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(unsigned long long),
.max_entries = 4,
};

SEC("func1=frame_refresh_start")
int bpf_prog1(struct pt_regs *ctx)
{
int delay_ns = 42 * NSEC_PER_MSEC; /* 42 milliseconds */
unsigned int index = 0;

bpf_map_update_elem(&timer_map, &index, &delay_ns, BPF_ANY);
return 0;
}

SEC("func2=frame_refresh_end")
int bpf_prog2(struct pt_regs *ctx)
{
unsigned int index = 0;

bpf_map_delete_elem(&timer_map, &index);
return 0;
}

SEC("func3=bpf_timer_callback")
int bpf_prog3(struct pt_regs *ctx, int result)
{
char fmt[] = "timer triggered\n";

bpf_trace_printk(fmt, sizeof(fmt));

/* If comes here, frame refresh time is beyond the threshold
we can switch on more tracepoints or enable more detailed
trace paramaters to diagnose the problems.
*/
return 0;
}

Then we can write an exec to test the ebpf progs above, each timeout
frame updates will trigger the timer.

---
include/uapi/linux/bpf.h | 1 +
kernel/bpf/arraymap.c | 161 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 149 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 92a48e2..c41c80e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_TIMER_ARRAY,
};

enum bpf_prog_type {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 29ace10..1e03c70 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -16,18 +16,10 @@
#include <linux/mm.h>
#include <linux/filter.h>

-/* Called from syscall */
-static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+static struct bpf_map *__array_map_alloc(union bpf_attr *attr, u32 elem_size)
{
struct bpf_array *array;
- u32 elem_size, array_size;
-
- /* check sanity of attributes */
- if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size == 0)
- return ERR_PTR(-EINVAL);
-
- elem_size = round_up(attr->value_size, 8);
+ u32 array_size;

/* check round_up into zero and u32 overflow */
if (elem_size == 0 ||
@@ -54,6 +46,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return &array->map;
}

+/* Called from syscall */
+static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+{
+ u32 elem_size;
+
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 4 ||
+ attr->value_size == 0)
+ return ERR_PTR(-EINVAL);
+
+ elem_size = round_up(attr->value_size, 8);
+ return __array_map_alloc(attr, elem_size);
+}
+
/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
@@ -171,7 +177,7 @@ static void fd_array_map_free(struct bpf_map *map)
kvfree(array);
}

-static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
+static void *empty_array_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
@@ -255,7 +261,7 @@ static const struct bpf_map_ops prog_array_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
- .map_lookup_elem = fd_array_map_lookup_elem,
+ .map_lookup_elem = empty_array_map_lookup_elem,
.map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
@@ -312,7 +318,7 @@ static const struct bpf_map_ops perf_event_array_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = perf_event_array_map_free,
.map_get_next_key = array_map_get_next_key,
- .map_lookup_elem = fd_array_map_lookup_elem,
+ .map_lookup_elem = empty_array_map_lookup_elem,
.map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
@@ -330,3 +336,132 @@ static int __init register_perf_event_array_map(void)
return 0;
}
late_initcall(register_perf_event_array_map);
+
+/* Intended empty function as ebpf stub */
+enum hrtimer_restart bpf_timer_callback(struct hrtimer *timer __maybe_unused)
+{
+ return HRTIMER_NORESTART;
+}
+
+static void timer_array_map_init_timers(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+ struct hrtimer *timer;
+
+ /* init all timer */
+ for (i = 0; i < array->map.max_entries; i++) {
+ timer = (struct hrtimer *)(array->value +
+ array->elem_size * i);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ timer->function = bpf_timer_callback;
+ }
+}
+
+static struct bpf_map *timer_array_map_alloc(union bpf_attr *attr)
+{
+ u32 elem_size;
+ struct bpf_map *map;
+
+ if (attr->value_size != sizeof(u64))
+ return ERR_PTR(-EINVAL);
+
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 4 ||
+ attr->value_size == 0)
+ return ERR_PTR(-EINVAL);
+
+ elem_size = round_up(sizeof(struct hrtimer), 8);
+ map = __array_map_alloc(attr, elem_size);
+ if (IS_ERR(map))
+ return map;
+
+ timer_array_map_init_timers(map);
+
+ return map;
+}
+
+static void timer_array_map_free(struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ int i;
+ struct hrtimer *timer;
+
+ synchronize_rcu();
+
+ /* cancel all untriggered timer */
+ for (i = 0; i < array->map.max_entries; i++) {
+ timer = (struct hrtimer *)(array->value +
+ array->elem_size * i);
+ hrtimer_cancel(timer);
+ }
+ kvfree(array);
+}
+
+/* Timer activate */
+static int timer_array_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u32 index = *(u32 *)key;
+ struct hrtimer *timer;
+ int delay_ns = *(u64 *)value;
+ ktime_t kt;
+
+ if (map_flags > BPF_EXIST)
+ /* unknown flags */
+ return -EINVAL;
+
+ if (index >= array->map.max_entries)
+ /* all elements were pre-allocated, cannot insert a new one */
+ return -E2BIG;
+
+ if (map_flags == BPF_NOEXIST)
+ /* all elements already exist */
+ return -EEXIST;
+
+ timer = (struct hrtimer *)(array->value + array->elem_size * index);
+ kt = ktime_set(0, delay_ns);
+ hrtimer_start(timer, kt, HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+/* Timer inactivate */
+static int timer_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u32 index = *(u32 *)key;
+ struct hrtimer *timer;
+
+ if (index >= array->map.max_entries)
+ return -E2BIG;
+
+ timer = (struct hrtimer *)(array->value + array->elem_size * index);
+
+ if (hrtimer_try_to_cancel(timer) >= 0)
+ return 0;
+ else
+ return -EBUSY;
+}
+
+static const struct bpf_map_ops timer_array_ops = {
+ .map_alloc = timer_array_map_alloc,
+ .map_free = timer_array_map_free,
+ .map_get_next_key = array_map_get_next_key,
+ .map_lookup_elem = empty_array_map_lookup_elem,
+ .map_update_elem = timer_array_map_update_elem,
+ .map_delete_elem = timer_array_map_delete_elem,
+};
+
+static struct bpf_map_type_list timer_array_type __read_mostly = {
+ .ops = &timer_array_ops,
+ .type = BPF_MAP_TYPE_TIMER_ARRAY,
+};
+
+static int __init register_timer_array_map(void)
+{
+ bpf_register_map_type(&timer_array_type);
+ return 0;
+}
+late_initcall(register_timer_array_map);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/