[PATCH] xfrm: policy: Fix use-after-free on inexact bin in xfrm_policy_bysel_ctx()
From: Sanghyun Park
Date: Fri May 29 2026 - 04:10:52 EST
xfrm_policy_bysel_ctx() saves a pointer to a xfrm_pol_inexact_bin
while holding xfrm_policy_lock, then drops the lock to call
xfrm_policy_kill(). After that it calls
xfrm_policy_inexact_prune_bin() on the saved pointer. A concurrent
xfrm_hash_rebuild (triggered by XFRM_MSG_NEWSPDINFO) can free the bin
via __xfrm_policy_inexact_flush() -> kfree_rcu() during the window
where the lock is not held, making the saved pointer stale.
Fix by pruning the bin while still holding xfrm_policy_lock, before
dropping it. Use __xfrm_policy_inexact_prune_bin() directly since the
lock is already held. This is safe because the function uses
kfree_rcu() for the actual free, which is non-blocking. The wrapper
xfrm_policy_inexact_prune_bin() becomes unused and is removed.
Race:
CPU0 (XFRM_MSG_DELPOLICY) CPU1 (XFRM_MSG_NEWSPDINFO)
============================ ==========================
xfrm_policy_bysel_ctx():
spin_lock_bh(xfrm_policy_lock)
bin = xfrm_policy_inexact_lookup()
__xfrm_policy_unlink(pol)
spin_unlock_bh(xfrm_policy_lock)
xfrm_policy_kill(ret)
// wide window, lock not held
xfrm_hash_rebuild():
spin_lock_bh(xfrm_policy_lock)
__xfrm_policy_inexact_flush():
kfree_rcu(bin) // bin freed
spin_unlock_bh(xfrm_policy_lock)
xfrm_policy_inexact_prune_bin(bin)
// UAF: bin is freed
Reproduction:
1. Build kernel >= 4.19 with CONFIG_KASAN=y, CONFIG_XFRM=y,
CONFIG_USER_NS=y (for unprivileged reproduction)
2. Boot in a VM
3. Compile: gcc -O2 -o repro -static -pthread repro.c
4. Run as any unprivileged user: ./repro 30 4
(30 seconds duration, 4 threads per type)
5. Check dmesg for: BUG: KASAN: slab-use-after-free in xfrm_policy_bysel_ctx
The reproducer is fully unprivileged: it uses unshare(CLONE_NEWUSER |
CLONE_NEWNET) to get CAP_NET_ADMIN inside a user namespace, then
races XFRM_MSG_DELPOLICY (by selector) against XFRM_MSG_NEWSPDINFO
(threshold changes that trigger hash rebuilds).
Note: the race is hard to trigger with vanilla kfree_rcu because the
actual free is deferred past the typical stale-access window. Adding a
cond_resched() delay after xfrm_policy_kill() or replacing kfree_rcu
with kfree (for testing only) makes it reliably reproducible. The
lifetime violation exists regardless of whether KASAN catches it on a
given run.
KASAN report (reproduced on 6.12.91 with immediate kfree for testing):
BUG: KASAN: slab-use-after-free in xfrm_policy_bysel_ctx.cold+0x59/0xb8
Read of size 8 at addr ffff8881153c4700 by task repro_xfrm/387
Call Trace:
xfrm_policy_bysel_ctx.cold+0x59/0xb8
xfrm_get_policy+0x7df/0xc00
xfrm_user_rcv_msg+0x41b/0x950
netlink_rcv_skb+0x16d/0x420
Fixes: 9cf545ebd5d8 ("xfrm: policy: implement selector-based inexact lookup")
Signed-off-by: Sanghyun Park <sanghyun.park.cnu@xxxxxxxxx>
---
Hi,
I'm Sanghyun Park, a security researcher. I found this while auditing
the XFRM policy code. The bug has existed since 4.19 and affects all
kernels since then. It is triggerable by any unprivileged user via user
namespaces (CLONE_NEWUSER + CLONE_NEWNET), making it a potential LPE
vector on systems with user namespaces enabled (all major distros by
default: Ubuntu, Fedora, Debian, Arch, etc.).
The C reproducer is attached separately (repro.c).
net/xfrm/xfrm_policy.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fca07f8e60..fef6cff511 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1156,15 +1156,6 @@ static void
__xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool
}
}
-static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
-{
- struct net *net = read_pnet(&b->k.net);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- __xfrm_policy_inexact_prune_bin(b, false);
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-}
-
static void __xfrm_policy_inexact_flush(struct net *net)
{
struct xfrm_pol_inexact_bin *bin, *t;
@@ -1707,12 +1698,14 @@ xfrm_policy_bysel_ctx(struct net *net, const
struct xfrm_mark *mark, u32 if_id,
}
ret = pol;
}
+
+ if (bin && ret && delete)
+ __xfrm_policy_inexact_prune_bin(bin, false);
+
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete)
xfrm_policy_kill(ret);
- if (bin && delete)
- xfrm_policy_inexact_prune_bin(bin);
return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
[ 42.057680] BUG: KASAN: slab-use-after-free in xfrm_policy_bysel_ctx.cold+0x59/0xb8
[ 42.058579] Read of size 8 at addr ffff8881153c4700 by task repro_xfrm/387
[ 42.059544] CPU: 1 UID: 0 PID: 387 Comm: repro_xfrm Not tainted 6.12.91-dirty #23
[ 42.059555] Hardware name: QEMU Ubuntu 25.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 42.059562] Call Trace:
[ 42.059575] <TASK>
[ 42.059582] dump_stack_lvl+0xba/0x110
[ 42.059623] ? xfrm_policy_bysel_ctx.cold+0x59/0xb8
[ 42.059630] print_report+0x174/0x4f6
[ 42.059665] ? __virt_addr_valid+0x86/0x670
[ 42.059701] ? xfrm_policy_bysel_ctx.cold+0x59/0xb8
[ 42.059708] kasan_report+0xda/0x110
[ 42.059736] ? xfrm_policy_bysel_ctx.cold+0x59/0xb8
[ 42.059745] xfrm_policy_bysel_ctx.cold+0x59/0xb8
[ 42.059753] ? __pfx_xfrm_policy_bysel_ctx+0x10/0x10
[ 42.059768] ? find_held_lock+0x2d/0x110
[ 42.059798] xfrm_get_policy+0x7df/0xc00
[ 42.059821] ? __pfx_xfrm_get_policy+0x10/0x10
[ 42.059828] ? hlock_class+0x4e/0x130
[ 42.059834] ? cap_capable+0x1d4/0x240
[ 42.059879] ? __nla_parse+0x40/0x60
[ 42.059908] ? __pfx_xfrm_get_policy+0x10/0x10
[ 42.059914] xfrm_user_rcv_msg+0x41b/0x950
[ 42.059921] ? __pfx_xfrm_user_rcv_msg+0x10/0x10
[ 42.059926] ? hlock_class+0x4e/0x130
[ 42.059932] ? __lock_acquire+0xfdf/0x3b50
[ 42.059951] ? __mutex_trylock_common+0xde/0x250
[ 42.059957] ? __pfx___mutex_trylock_common+0x10/0x10
[ 42.059962] netlink_rcv_skb+0x16d/0x420
[ 42.059968] ? __pfx_xfrm_user_rcv_msg+0x10/0x10
[ 42.059973] ? __pfx_netlink_rcv_skb+0x10/0x10
[ 42.059977] ? netlink_deliver_tap+0x1af/0xd50
[ 42.059998] xfrm_netlink_rcv+0x76/0x90
[ 42.060003] netlink_unicast+0x58c/0x850
[ 42.060010] ? __pfx_netlink_unicast+0x10/0x10
[ 42.060018] netlink_sendmsg+0x8f7/0xdc0
[ 42.060026] ? __pfx_netlink_sendmsg+0x10/0x10
[ 42.060034] ? __pfx_netlink_sendmsg+0x10/0x10
[ 42.060041] ____sys_sendmsg+0x907/0xa60
[ 42.060075] ? __pfx_____sys_sendmsg+0x10/0x10
[ 42.060081] ? netlink_recvmsg+0x77b/0xe60
[ 42.060088] ___sys_sendmsg+0x197/0x1e0
[ 42.060093] ? __pfx____sys_sendmsg+0x10/0x10
[ 42.060104] __sys_sendmsg+0x176/0x220
[ 42.060120] ? __pfx___sys_sendmsg+0x10/0x10
[ 42.060128] ? trace_x86_fpu_regs_activated+0x5d/0x1b0
[ 42.060154] do_syscall_64+0xbb/0x1f0
[ 42.060174] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 42.060202] RIP: 0033:0x455b82
[ 42.060219] Code: 08 0f 85 71 df ff ff 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 89 5c 24 08 0f 05 <c3> 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 55 48 89 e5
[ 42.060224] RSP: 002b:00007f3dfc9a3f68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 42.060247] RAX: ffffffffffffffda RBX: 00007f3dfc9a56c0 RCX: 0000000000455b82
[ 42.060251] RDX: 0000000000000000 RSI: 00007f3dfc9a3fe0 RDI: 0000000000000004
[ 42.060254] RBP: 00007f3dfc9a3f90 R08: 0000000000000000 R09: 0000000000000000
[ 42.060257] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000020
[ 42.060260] R13: ffffffffffffffd0 R14: 0000000000000001 R15: 00007ffeb5970cb0
[ 42.060266] </TASK>
[ 42.090666] Allocated by task 387:
[ 42.091077] kasan_save_stack+0x30/0x50
Attachment:
repro.c
Description: Binary data