[PATCH v3 1/6] bpf: Introduce the bpf_list_del kfunc.
From: Chengkaitao
Date: Mon Mar 02 2026 - 07:44:00 EST
From: Kaitao Cheng <chengkaitao@xxxxxxxxxx>
If a user holds ownership of a node in the middle of a list, they
can directly remove it from the list without strictly adhering to
deletion rules from the head or tail.
When a kfunc has only one bpf_list_node parameter, supplement the
initialization of the corresponding btf_field. Add a new lock_rec
member to struct bpf_reference_state for lock holding detection.
This is typically paired with bpf_refcount. After calling
bpf_list_del, it is generally necessary to drop the reference to
the list node twice to prevent reference count leaks.
Signed-off-by: Kaitao Cheng <chengkaitao@xxxxxxxxxx>
---
include/linux/bpf_verifier.h | 4 +++
kernel/bpf/btf.c | 33 +++++++++++++++++++---
kernel/bpf/helpers.c | 17 ++++++++++++
kernel/bpf/verifier.c | 54 ++++++++++++++++++++++++++++++++++--
4 files changed, 101 insertions(+), 7 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ef8e45a362d9..e1358b62d6cc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -261,6 +261,10 @@ struct bpf_reference_state {
* it matches on unlock.
*/
void *ptr;
+ /* For REF_TYPE_LOCK_*: btf_record of the locked object, used for lock
+ * checking in kfuncs such as bpf_list_del.
+ */
+ struct btf_record *lock_rec;
};
struct bpf_retval_range {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4872d2a6c42d..8a977c793d56 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3785,7 +3785,6 @@ static int btf_find_field_one(const struct btf *btf,
case BPF_RES_SPIN_LOCK:
case BPF_TIMER:
case BPF_WORKQUEUE:
- case BPF_LIST_NODE:
case BPF_RB_NODE:
case BPF_REFCOUNT:
case BPF_TASK_WORK:
@@ -3794,6 +3793,27 @@ static int btf_find_field_one(const struct btf *btf,
if (ret < 0)
return ret;
break;
+ case BPF_LIST_NODE:
+ ret = btf_find_struct(btf, var_type, off, sz, field_type,
+ info_cnt ? &info[0] : &tmp);
+ if (ret < 0)
+ return ret;
+ /* graph_root for verifier: container type and node member name */
+ if (info_cnt && var_idx >= 0 && (u32)var_idx < btf_type_vlen(var)) {
+ u32 id;
+ const struct btf_member *member;
+
+ for (id = 1; id < btf_nr_types(btf); id++) {
+ if (btf_type_by_id(btf, id) == var) {
+ info[0].graph_root.value_btf_id = id;
+ member = btf_type_member(var) + var_idx;
+ info[0].graph_root.node_name =
+ __btf_name_by_offset(btf, member->name_off);
+ break;
+ }
+ }
+ }
+ break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
@@ -4138,6 +4158,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
+ case BPF_LIST_NODE:
case BPF_LIST_HEAD:
ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
@@ -4148,7 +4169,6 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
- case BPF_LIST_NODE:
case BPF_RB_NODE:
break;
default:
@@ -4192,20 +4212,25 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
int i;
/* There are three types that signify ownership of some other type:
- * kptr_ref, bpf_list_head, bpf_rb_root.
+ * kptr_ref, bpf_list_head/node, bpf_rb_root.
* kptr_ref only supports storing kernel types, which can't store
* references to program allocated local types.
*
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR)))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask &
+ (BPF_GRAPH_ROOT | BPF_GRAPH_NODE | BPF_UPTR)))
return 0;
+
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
const struct btf_type *t;
u32 btf_id;
+ if (rec->fields[i].type & BPF_GRAPH_NODE)
+ rec->fields[i].graph_root.value_rec = rec;
+
if (rec->fields[i].type == BPF_UPTR) {
/* The uptr only supports pinning one page and cannot
* point to a kernel struct
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6eb6c82ed2ee..577af62a9f7a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2459,6 +2459,22 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_list_node *bpf_list_del(struct bpf_list_node *node)
+{
+ struct bpf_list_node_kern *knode = (struct bpf_list_node_kern *)node;
+
+ if (unlikely(!knode))
+ return NULL;
+
+ if (WARN_ON_ONCE(!READ_ONCE(knode->owner)))
+ return NULL;
+
+ list_del_init(&knode->list_head);
+ WRITE_ONCE(knode->owner, NULL);
+
+ return node;
+}
+
__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
{
struct list_head *h = (struct list_head *)head;
@@ -4545,6 +4561,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a3390190c26e..8a782772dd36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1536,7 +1536,7 @@ static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
}
static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
- int id, void *ptr)
+ int id, void *ptr, struct btf_record *lock_rec)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_reference_state *s;
@@ -1547,6 +1547,7 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r
s->type = type;
s->id = id;
s->ptr = ptr;
+ s->lock_rec = lock_rec;
state->active_locks++;
state->active_lock_id = id;
@@ -1662,6 +1663,23 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st
return NULL;
}
+static bool rec_has_list_matching_node_type(struct bpf_verifier_env *env,
+ const struct btf_record *rec,
+ const struct btf *node_btf, u32 node_btf_id)
+{
+ u32 i;
+
+ for (i = 0; i < rec->cnt; i++) {
+ if (!(rec->fields[i].type & BPF_LIST_HEAD))
+ continue;
+ if (btf_struct_ids_match(&env->log, node_btf, node_btf_id, 0,
+ rec->fields[i].graph_root.btf,
+ rec->fields[i].graph_root.value_btf_id, true))
+ return true;
+ }
+ return false;
+}
+
static void update_peak_states(struct bpf_verifier_env *env)
{
u32 cur_states;
@@ -8576,7 +8594,8 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
type = REF_TYPE_RES_LOCK;
else
type = REF_TYPE_LOCK;
- err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
+ err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr,
+ reg_btf_record(reg));
if (err < 0) {
verbose(env, "Failed to acquire lock state\n");
return err;
@@ -12431,6 +12450,7 @@ enum special_kfunc_type {
KF_bpf_list_push_back_impl,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
+ KF_bpf_list_del,
KF_bpf_list_front,
KF_bpf_list_back,
KF_bpf_cast_to_kern_ctx,
@@ -12491,6 +12511,7 @@ BTF_ID(func, bpf_list_push_front_impl)
BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_list_del)
BTF_ID(func, bpf_list_front)
BTF_ID(func, bpf_list_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
@@ -12966,6 +12987,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
+ btf_id == special_kfunc_list[KF_bpf_list_del] ||
btf_id == special_kfunc_list[KF_bpf_list_front] ||
btf_id == special_kfunc_list[KF_bpf_list_back];
}
@@ -13088,7 +13110,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
switch (node_field_type) {
case BPF_LIST_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]);
break;
case BPF_RB_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
@@ -13211,6 +13234,9 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (!*node_field)
+ *node_field = field;
+
field = *node_field;
et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
@@ -13237,6 +13263,28 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
return -EINVAL;
}
+ /* bpf_list_del: require list head's lock. Use refs[] REF_TYPE_LOCK_MASK
+ * only. At lock time we stored the locked object's btf_record in ref->
+ * lock_rec, so we can get the list value type from the ref directly.
+ */
+ if (node_field_type == BPF_LIST_NODE &&
+ meta->func_id == special_kfunc_list[KF_bpf_list_del]) {
+ struct bpf_verifier_state *cur = env->cur_state;
+
+ for (int i = 0; i < cur->acquired_refs; i++) {
+ struct bpf_reference_state *s = &cur->refs[i];
+
+ if (!(s->type & REF_TYPE_LOCK_MASK) || !s->lock_rec)
+ continue;
+
+ if (rec_has_list_matching_node_type(env, s->lock_rec,
+ reg->btf, reg->btf_id))
+ return 0;
+ }
+ verbose(env, "bpf_spin_lock must be held for bpf_list_del\n");
+ return -EINVAL;
+ }
+
return 0;
}
--
2.50.1 (Apple Git-155)