Re: [PATCH] bpf: Always defer local storage free

From: Cheng-Yang Chou

Date: Tue Mar 17 2026 - 14:51:16 EST


Hi Andrea,

On Tue, Mar 17, 2026 at 09:15:49AM +0100, Andrea Righi wrote:
> From: Andrea Righi <arighi@xxxxxxxxxx>
> Subject: [PATCH] bpf: Avoid circular lock dependency when deleting local
> storage
>
> Calling bpf_task_storage_delete() from a context that holds the runqueue
> lock (e.g., sched_ext's ops.exit_task() callback) can lead to a circular
> lock dependency:
>
> WARNING: possible circular locking dependency detected
> ...
> Chain exists of:
> rcu_tasks_trace_srcu_struct_srcu_usage.lock --> &p->pi_lock --> &rq->__lock
>
> Possible unsafe locking scenario:
>
> CPU0 CPU1
> ---- ----
> lock(&rq->__lock);
> lock(&p->pi_lock);
> lock(&rq->__lock);
> lock(rcu_tasks_trace_srcu_struct_srcu_usage.lock);
>
> *** DEADLOCK ***
>
> Fix by adding a reuse_now flag to bpf_selem_unlink() with the same
> meaning as in bpf_selem_free() and bpf_local_storage_free(). When the
> task is in the TASK_DEAD state it will not run sleepable BPF again, so
> it is safe to free storage immediately via call_rcu() instead of
> call_rcu_tasks_trace() and we can prevent the circular lock dependency.
>
> Other local storage types (sk, cgrp, inode) use reuse_now=false and keep
> waiting for sleepable BPF before freeing.
>
> Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
> ---
> include/linux/bpf_local_storage.h | 2 +-
> kernel/bpf/bpf_cgrp_storage.c | 2 +-
> kernel/bpf/bpf_inode_storage.c | 2 +-
> kernel/bpf/bpf_local_storage.c | 6 +++---
> kernel/bpf/bpf_task_storage.c | 7 ++++++-
> net/core/bpf_sk_storage.c | 2 +-
> 6 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
> index 8157e8da61d40..f5d4159646a83 100644
> --- a/include/linux/bpf_local_storage.h
> +++ b/include/linux/bpf_local_storage.h
> @@ -184,7 +184,7 @@ int bpf_local_storage_map_check_btf(struct bpf_map *map,
> void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
> struct bpf_local_storage_elem *selem);
>
> -int bpf_selem_unlink(struct bpf_local_storage_elem *selem);
> +int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
>
> int bpf_selem_link_map(struct bpf_local_storage_map *smap,
> struct bpf_local_storage *local_storage,
> diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
> index c2a2ead1f466d..853183eead2c2 100644
> --- a/kernel/bpf/bpf_cgrp_storage.c
> +++ b/kernel/bpf/bpf_cgrp_storage.c
> @@ -89,7 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
> if (!sdata)
> return -ENOENT;
>
> - return bpf_selem_unlink(SELEM(sdata));
> + return bpf_selem_unlink(SELEM(sdata), false);
> }
>
> static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
> diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
> index e86734609f3d2..470f4b02c79ea 100644
> --- a/kernel/bpf/bpf_inode_storage.c
> +++ b/kernel/bpf/bpf_inode_storage.c
> @@ -110,7 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
> if (!sdata)
> return -ENOENT;
>
> - return bpf_selem_unlink(SELEM(sdata));
> + return bpf_selem_unlink(SELEM(sdata), false);
> }
>
> static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
> index 9c96a4477f81a..caa1aa5bc17c7 100644
> --- a/kernel/bpf/bpf_local_storage.c
> +++ b/kernel/bpf/bpf_local_storage.c
> @@ -385,7 +385,7 @@ static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b,
> * Unlink an selem from map and local storage with lock held.
> * This is the common path used by local storages to delete an selem.
> */
> -int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
> +int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
> {
> struct bpf_local_storage *local_storage;
> bool free_local_storage = false;
> @@ -419,10 +419,10 @@ int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
> out:
> raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
>
> - bpf_selem_free_list(&selem_free_list, false);
> + bpf_selem_free_list(&selem_free_list, reuse_now);
>
> if (free_local_storage)
> - bpf_local_storage_free(local_storage, false);
> + bpf_local_storage_free(local_storage, reuse_now);
>
> return err;
> }
> diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
> index 605506792b5b4..0311e2cd3f3e6 100644
> --- a/kernel/bpf/bpf_task_storage.c
> +++ b/kernel/bpf/bpf_task_storage.c
> @@ -134,7 +134,12 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
> if (!sdata)
> return -ENOENT;
>
> - return bpf_selem_unlink(SELEM(sdata));
> + /*
> + * When the task is dead it won't run sleepable BPF again, so it is
> + * safe to reuse storage immediately.
> + */
> + return bpf_selem_unlink(SELEM(sdata),
> + READ_ONCE(task->__state) == TASK_DEAD);
> }
>
> static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
> diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
> index f8338acebf077..d20b4b5c99ef7 100644
> --- a/net/core/bpf_sk_storage.c
> +++ b/net/core/bpf_sk_storage.c
> @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
> if (!sdata)
> return -ENOENT;
>
> - return bpf_selem_unlink(SELEM(sdata));
> + return bpf_selem_unlink(SELEM(sdata), false);
> }
>
> /* Called by __sk_destruct() & bpf_sk_storage_clone() */
> --
> 2.53.0
>

I was able to reproduce the error on my end using the steps you
provided earlier. After testing with this patch applied, the error
is resolved.

Tested-by: Cheng-Yang Chou <yphbchou0911@xxxxxxxxx>

--
Thanks,
Cheng-Yang