[PATCH AUTOSEL 6.9 04/35] bpf: Avoid kfree_rcu() under lock in bpf_lpm_trie.

From: Sasha Levin
Date: Mon May 27 2024 - 10:13:11 EST


From: Alexei Starovoitov <ast@xxxxxxxxxx>

[ Upstream commit 59f2f841179aa6a0899cb9cf53659149a35749b7 ]

syzbot reported the following lock sequence:
cpu 2:
grabs timer_base lock
spins on bpf_lpm lock

cpu 1:
grab rcu krcp lock
spins on timer_base lock

cpu 0:
grab bpf_lpm lock
spins on rcu krcp lock

bpf_lpm lock can be the same.
timer_base lock can also be the same due to timer migration.
but rcu krcp lock is always per-cpu, so it cannot be the same lock.
Hence it's a false positive.
To avoid lockdep complaining move kfree_rcu() after spin_unlock.

Reported-by: syzbot+1fa663a2100308ab6eab@xxxxxxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
Link: https://lore.kernel.org/bpf/20240329171439.37813-1-alexei.starovoitov@xxxxxxxxx
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
kernel/bpf/lpm_trie.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 050fe1ebf0f7d..d0febf07051ed 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -308,6 +308,7 @@ static long trie_update_elem(struct bpf_map *map,
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+ struct lpm_trie_node *free_node = NULL;
struct lpm_trie_node __rcu **slot;
struct bpf_lpm_trie_key_u8 *key = _key;
unsigned long irq_flags;
@@ -382,7 +383,7 @@ static long trie_update_elem(struct bpf_map *map,
trie->n_entries--;

rcu_assign_pointer(*slot, new_node);
- kfree_rcu(node, rcu);
+ free_node = node;

goto out;
}
@@ -429,6 +430,7 @@ static long trie_update_elem(struct bpf_map *map,
}

spin_unlock_irqrestore(&trie->lock, irq_flags);
+ kfree_rcu(free_node, rcu);

return ret;
}
@@ -437,6 +439,7 @@ static long trie_update_elem(struct bpf_map *map,
static long trie_delete_elem(struct bpf_map *map, void *_key)
{
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct lpm_trie_node *free_node = NULL, *free_parent = NULL;
struct bpf_lpm_trie_key_u8 *key = _key;
struct lpm_trie_node __rcu **trim, **trim2;
struct lpm_trie_node *node, *parent;
@@ -506,8 +509,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
else
rcu_assign_pointer(
*trim2, rcu_access_pointer(parent->child[0]));
- kfree_rcu(parent, rcu);
- kfree_rcu(node, rcu);
+ free_parent = parent;
+ free_node = node;
goto out;
}

@@ -521,10 +524,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
else
RCU_INIT_POINTER(*trim, NULL);
- kfree_rcu(node, rcu);
+ free_node = node;

out:
spin_unlock_irqrestore(&trie->lock, irq_flags);
+ kfree_rcu(free_parent, rcu);
+ kfree_rcu(free_node, rcu);

return ret;
}
--
2.43.0