[PATCH v2 net-next 2/4] bpf: bpf_htab: Add BPF_MAP_TYPE_PERCPU_HASH

From: Martin KaFai Lau
Date: Tue Jan 12 2016 - 03:21:54 EST


This patch adds BPFMAP_TYPE_PERCPU_HASH map type and its
htab_map_ops implementation.

Signed-off-by: Martin KaFai Lau <kafai@xxxxxx>
---
include/uapi/linux/bpf.h | 1 +
kernel/bpf/hashtab.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 187 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa6f857..43ae40c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
};

enum bpf_prog_type {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 24a6a47..02d7473 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -480,9 +480,195 @@ static struct bpf_map_type_list htab_type __read_mostly = {
.type = BPF_MAP_TYPE_HASH,
};

+/* each htab_percpu_elem is struct htab_percpu_elem + key */
+struct htab_percpu_elem {
+ struct htab_elem_common common;
+ void __percpu *value;
+ char key[0] __aligned(8);
+};
+
+static struct htab_percpu_elem *htab_percpu_elem(struct htab_elem_common *l)
+{
+ return (struct htab_percpu_elem *)l;
+}
+
+static void htab_percpu_elem_free(struct htab_percpu_elem *l)
+{
+ free_percpu(l->value);
+ kfree(l);
+}
+
+static void htab_percpu_elem_rcu_free(struct rcu_head *head)
+{
+ struct htab_elem_common *l = container_of(head,
+ struct htab_elem_common,
+ rcu);
+
+ htab_percpu_elem_free(htab_percpu_elem(l));
+}
+
+static struct htab_percpu_elem *htab_percpu_elem_alloc(struct bpf_htab *htab,
+ void *key,
+ void *value)
+{
+ /* allocate new element outside of lock */
+ struct htab_percpu_elem *l;
+
+ l = htab_percpu_elem(htab_elem_common_alloc(htab, key));
+ if (!l)
+ return NULL;
+
+ l->value = __alloc_percpu_gfp(htab->map.value_size, 8,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!l->value) {
+ htab_percpu_elem_free(l);
+ return NULL;
+ }
+
+ memcpy(raw_cpu_ptr(l->value), value, htab->map.value_size);
+
+ return l;
+}
+
+/* Called from syscall */
+static struct bpf_map *htab_percpu_map_alloc(union bpf_attr *attr)
+{
+ u32 elem_size = sizeof(struct htab_percpu_elem) +
+ round_up(attr->key_size, 8);
+ u32 elem_value_size = elem_size +
+ num_possible_cpus() * attr->value_size;
+
+ return __htab_map_alloc(attr, elem_size, elem_value_size,
+ offsetof(struct htab_percpu_elem, key));
+}
+
+/* Called from syscall or from eBPF program */
+static int htab_percpu_map_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct htab_elem_common *l;
+ struct hlist_head *head;
+ unsigned long flags;
+ u32 hash, key_size;
+ struct bucket *b;
+ int ret = -ENOENT;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ key_size = map->key_size;
+
+ hash = htab_map_hash(key, key_size);
+ b = __select_bucket(htab, hash);
+ head = &b->head;
+
+ raw_spin_lock_irqsave(&b->lock, flags);
+
+ l = lookup_elem_raw(htab, head, hash, key);
+
+ if (l) {
+ hlist_del_rcu(&l->hash_node);
+ atomic_dec(&htab->count);
+ call_rcu(&l->rcu, htab_percpu_elem_rcu_free);
+ ret = 0;
+ }
+
+ raw_spin_unlock_irqrestore(&b->lock, flags);
+ return ret;
+}
+
+/* Called from syscall or eBPF program */
+static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct htab_elem_common *l;
+
+ l = __htab_map_lookup_elem(htab, key);
+ if (l) {
+ void *value = this_cpu_ptr(htab_percpu_elem(l)->value);
+ return value;
+ }
+
+ return NULL;
+}
+
+/* Called from syscall or from eBPF program */
+static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct htab_percpu_elem *l_new, *l_old;
+ struct hlist_head *head;
+ struct bucket *b;
+ unsigned long flags;
+ int ret;
+
+ if (map_flags > BPF_EXIST)
+ /* unknown flags */
+ return -EINVAL;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ /* allocate new element outside of lock */
+ l_new = htab_percpu_elem_alloc(htab, key, value);
+ if (!l_new)
+ return -ENOMEM;
+
+ b = __select_bucket(htab, l_new->common.hash);
+ head = &b->head;
+
+ /* bpf_percpu_map_update_elem() can be called in_irq() */
+ raw_spin_lock_irqsave(&b->lock, flags);
+
+ l_old = htab_percpu_elem(htab_map_check_update(
+ htab, head, l_new->common.hash, key,
+ map_flags));
+
+ if (IS_ERR(l_old)) {
+ ret = PTR_ERR(l_old);
+ goto err;
+ }
+
+ if (l_old) {
+ memcpy(this_cpu_ptr(l_old->value), value, map->value_size);
+ } else {
+ hlist_add_head_rcu(&l_new->common.hash_node, head);
+ atomic_inc(&htab->count);
+ }
+
+ raw_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+err:
+ raw_spin_unlock_irqrestore(&b->lock, flags);
+ htab_percpu_elem_free(l_new);
+ return ret;
+}
+
+static void htab_percpu_map_free(struct bpf_map *map)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+ __htab_map_free(htab, (elem_free_fn)htab_percpu_elem_free);
+}
+
+static const struct bpf_map_ops htab_percpu_ops = {
+ .map_alloc = htab_percpu_map_alloc,
+ .map_free = htab_percpu_map_free,
+ .map_get_next_key = htab_map_get_next_key,
+ .map_lookup_elem = htab_percpu_map_lookup_elem,
+ .map_update_elem = htab_percpu_map_update_elem,
+ .map_delete_elem = htab_percpu_map_delete_elem,
+};
+
+static struct bpf_map_type_list htab_percpu_type __read_mostly = {
+ .ops = &htab_percpu_ops,
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+};
+
static int __init register_htab_map(void)
{
bpf_register_map_type(&htab_type);
+ bpf_register_map_type(&htab_percpu_type);
return 0;
}
late_initcall(register_htab_map);
--
2.5.1