[PATCH bpf-next 05/14] bpf: extend bpf_prog_array to store pointers to the cgroup storage

From: Roman Gushchin
Date: Thu Jun 28 2018 - 12:48:39 EST


This patch converts bpf_prog_array from an array of prog pointers
to the array of struct bpf_prog_array_item elements.

This allows to save a cgroup storage pointer for each bpf program
efficiently attached to a cgroup.

Signed-off-by: Roman Gushchin <guro@xxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Acked-by: Martin KaFai Lau <kafai@xxxxxx>
---
include/linux/bpf.h | 19 +++++++++-----
kernel/bpf/cgroup.c | 24 ++++++++++-------
kernel/bpf/core.c | 76 +++++++++++++++++++++++++++--------------------------
3 files changed, 66 insertions(+), 53 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4b3e42e5b6d0..709354a0608a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -348,9 +348,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
* The 'struct bpf_prog_array *' should only be replaced with xchg()
* since other cpus are walking the array of pointers in parallel.
*/
+struct bpf_prog_array_item {
+ struct bpf_prog *prog;
+ struct bpf_cgroup_storage *cgroup_storage;
+};
+
struct bpf_prog_array {
struct rcu_head rcu;
- struct bpf_prog *progs[0];
+ struct bpf_prog_array_item items[0];
};

struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -371,7 +376,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,

#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
- struct bpf_prog **_prog, *__prog; \
+ struct bpf_prog_array_item *_item; \
+ struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
preempt_disable(); \
@@ -379,10 +385,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
- _prog = _array->progs; \
- while ((__prog = READ_ONCE(*_prog))) { \
- _ret &= func(__prog, ctx); \
- _prog++; \
+ _item = &_array->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ bpf_cgroup_storage_set(_item->cgroup_storage); \
+ _ret &= func(_prog, ctx); \
+ _item++; \
} \
_out: \
rcu_read_unlock(); \
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f0a809868f92..14a1f6c94592 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,16 +117,20 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
- if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
- list_for_each_entry(pl,
- &p->bpf.progs[type], node) {
- if (!pl->prog)
- continue;
- rcu_dereference_protected(progs, 1)->
- progs[cnt++] = pl->prog;
- }
- p = cgroup_parent(p);
- } while (p);
+ if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ list_for_each_entry(pl, &p->bpf.progs[type], node) {
+ if (!pl->prog)
+ continue;
+
+ rcu_dereference_protected(progs, 1)->
+ items[cnt].prog = pl->prog;
+ rcu_dereference_protected(progs, 1)->
+ items[cnt].cgroup_storage = pl->storage;
+ cnt++;
+ }
+ } while ((p = cgroup_parent(p)));

*array = progs;
return 0;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..145f44cb0cad 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1570,7 +1570,8 @@ struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
{
if (prog_cnt)
return kzalloc(sizeof(struct bpf_prog_array) +
- sizeof(struct bpf_prog *) * (prog_cnt + 1),
+ sizeof(struct bpf_prog_array_item) *
+ (prog_cnt + 1),
flags);

return &empty_prog_array.hdr;
@@ -1584,43 +1585,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
kfree_rcu(progs, rcu);
}

-int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
{
- struct bpf_prog **prog;
+ struct bpf_prog_array_item *item;
u32 cnt = 0;

rcu_read_lock();
- prog = rcu_dereference(progs)->progs;
- for (; *prog; prog++)
- if (*prog != &dummy_bpf_prog.prog)
+ item = rcu_dereference(array)->items;
+ for (; item->prog; item++)
+ if (item->prog != &dummy_bpf_prog.prog)
cnt++;
rcu_read_unlock();
return cnt;
}

-static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
+
+static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
u32 *prog_ids,
u32 request_cnt)
{
+ struct bpf_prog_array_item *item;
int i = 0;

- for (; *prog; prog++) {
- if (*prog == &dummy_bpf_prog.prog)
+ item = rcu_dereference(array)->items;
+ for (; item->prog; item++) {
+ if (item->prog == &dummy_bpf_prog.prog)
continue;
- prog_ids[i] = (*prog)->aux->id;
+ prog_ids[i] = item->prog->aux->id;
if (++i == request_cnt) {
- prog++;
+ item++;
break;
}
}

- return !!(*prog);
+ return !!(item->prog);
}

-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 cnt)
{
- struct bpf_prog **prog;
unsigned long err = 0;
bool nospc;
u32 *ids;
@@ -1639,8 +1642,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
if (!ids)
return -ENOMEM;
rcu_read_lock();
- prog = rcu_dereference(progs)->progs;
- nospc = bpf_prog_array_copy_core(prog, ids, cnt);
+ nospc = bpf_prog_array_copy_core(array, ids, cnt);
rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
@@ -1651,14 +1653,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return 0;
}

-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
struct bpf_prog *old_prog)
{
- struct bpf_prog **prog = progs->progs;
+ struct bpf_prog_array_item *item = array->items;

- for (; *prog; prog++)
- if (*prog == old_prog) {
- WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+ for (; item->prog; item++)
+ if (item->prog == old_prog) {
+ WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
break;
}
}
@@ -1669,7 +1671,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
- struct bpf_prog **existing_prog;
+ struct bpf_prog_array_item *existing;
struct bpf_prog_array *array;
bool found_exclude = false;
int new_prog_idx = 0;
@@ -1678,15 +1680,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
* the new array.
*/
if (old_array) {
- existing_prog = old_array->progs;
- for (; *existing_prog; existing_prog++) {
- if (*existing_prog == exclude_prog) {
+ existing = old_array->items;
+ for (; existing->prog; existing++) {
+ if (existing->prog == exclude_prog) {
found_exclude = true;
continue;
}
- if (*existing_prog != &dummy_bpf_prog.prog)
+ if (existing->prog != &dummy_bpf_prog.prog)
carry_prog_cnt++;
- if (*existing_prog == include_prog)
+ if (existing->prog == include_prog)
return -EEXIST;
}
}
@@ -1712,15 +1714,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,

/* Fill in the new prog array */
if (carry_prog_cnt) {
- existing_prog = old_array->progs;
- for (; *existing_prog; existing_prog++)
- if (*existing_prog != exclude_prog &&
- *existing_prog != &dummy_bpf_prog.prog)
- array->progs[new_prog_idx++] = *existing_prog;
+ existing = old_array->items;
+ for (; existing->prog; existing++)
+ if (existing->prog != exclude_prog &&
+ existing->prog != &dummy_bpf_prog.prog) {
+ array->items[new_prog_idx++].prog =
+ existing->prog;
+ }
}
if (include_prog)
- array->progs[new_prog_idx++] = include_prog;
- array->progs[new_prog_idx] = NULL;
+ array->items[new_prog_idx++].prog = include_prog;
+ array->items[new_prog_idx].prog = NULL;
*new_array = array;
return 0;
}
@@ -1729,7 +1733,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
- struct bpf_prog **prog;
u32 cnt = 0;

if (array)
@@ -1742,8 +1745,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
return 0;

/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
- prog = rcu_dereference_check(array, 1)->progs;
- return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
+ return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
: 0;
}

--
2.14.4