Re: XDP BPF JIT memory leak on armv7

From: Daniel Borkmann

Date: Thu Apr 16 2026 - 10:37:51 EST


Hi Jonas,

On 4/16/26 4:05 PM, Jonas Rebmann wrote:
Hello,

We have observed a memory leak when using the JIT on armv7 with XDP
programs in all kernels tested (6.1 to v7.0/master).

kmemleak reports:

unreferenced object 0xbf0a1000 (size 4096):
  comm "ping", pid 818, jiffies 4294949376
  hex dump (first 32 bytes):
    00 10 00 00 f1 de fd e7 f1 de fd e7 f1 de fd e7  ................
    f1 de fd e7 f1 de fd e7 f1 de fd e7 f1 de fd e7  ................
  backtrace (crc 38a5ba95):
    kmemleak_vmalloc+0x38/0x94
    __vmalloc_node_range_noprof+0x624/0x7e0
    execmem_alloc+0x7c/0x100
    bpf_jit_binary_alloc+0x64/0xfc
    bpf_int_jit_compile+0x14c/0x348
    bpf_jit_subprogs+0x4fc/0xa60
    bpf_check+0x1840/0x30e0
    bpf_prog_load+0x720/0xec4
    __sys_bpf+0x4cc/0x25d4
    ret_fast_syscall+0x0/0x54

This was reproduced on various debian systems:
 - stock debian sid with 6.19.12+deb14-armmp-lpae
 - stock debian 12 with 6.1.0-44-armmp-lpae

As well as debian sid with custom built kernels with a
multi_v7_defconfig based config, amongst versions tested: latest master,
(7.0.0-08391-g1d51b370a0f8), v7.0, v6.18, v6.8, v6.12.

Precise reproduction steps:
Enable the BPF JIT (if not done via CONFIG_BPF_JIT_ALWAYS_ON=y):
# sysctl -w net.core.bpf_jit_enable=1

Prepare any simple XDP program:
$ git clone https://github.com/xdp-project/xdp-tutorial.git
$ cd xdp-tutorial
# apt install git cmake build-essential xdp-tools clang pkg-config libelf-dev m4 libpcap-dev llvm libbpf-dev libxdp-dev
$ ln -s /usr/include/arm-linux-gnueabihf/gnu/stubs-hard.h /usr/include/arm-linux-gnueabihf/gnu/stubs-soft.h
$ ./configure
$ cd basic01-xdp-pass
$ make

Store /proc/vmallocinfo:
# cat /proc/vmallocinfo > /tmp/vmallocinfo-before

Load the program and confirm it is loaded, unload and confirm:
# ./xdp_pass_user -d lo
# xdp-loader status
lo                     xdp_prog_simple   skb      83   d4f8542f2b42fac5
# ./xdp_pass_user -d lo --unload-all
# xdp-loader status
lo                     <No XDP program loaded!>

Compare /proc/vmallocinfo:
# cat /proc/vmallocinfo > /tmp/vmallocinfo-after
# diff -u <(grep bpf_jit_binary_alloc /tmp/vmallocinfo-before) <(grep bpf_jit_binary_alloc /tmp/vmallocinfo-after)
+0x4e292a08-0xae3e49c0    8192 bpf_jit_binary_alloc+0x64/0xfc pages=1 vmalloc

# echo scan > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak
[see above]

The backtrace corresponds in commit 1d51b370a0f8 to

mm/kmemleak.c:596
mm/vmalloc.c:4093
mm/execmem.c:44
kernel/bpf/core.c:1093
arch/arm/net/bpf_jit_32.c:2241
kernel/bpf/fixups.c:1122
kernel/bpf/verifier.c:20099
kernel/bpf/syscall.c:3083
kernel/bpf/syscall.c:6252
arch/arm/kernel/entry-common.S:44

Each subsequent run introduces an additional leak.
I don't have access to arm32, but it looks like its completely missing the
ability to do BPF to BPF calls.. you would need something like the below
(uncompiled / untested).

It would actually be nice to you have any chance to run through the BPF
selftests to see what else is broken on arm32. Outside of x86-64, arm64,
riscv64 and s390x BPF JITs the results might vary quite a lot on what works
and what doesn't. :/

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index deeb8f292454..89b24fe357aa 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -216,6 +216,12 @@ struct jit_ctx {
#endif
};
+struct arm_jit_data {
+ struct jit_ctx ctx;
+ struct bpf_binary_header *header;
+ u8 *image_ptr;
+};
+
/*
* Wrappers which handle both OABI and EABI and assures Thumb2 interworking
* (where the assembly routines like __aeabi_uidiv could cause problems).
@@ -2146,7 +2152,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
+ struct arm_jit_data *jit_data;
bool tmp_blinded = false;
+ bool extra_pass = false;
struct jit_ctx ctx;
unsigned int tmp_idx;
unsigned int image_size;
@@ -2171,6 +2179,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}
+ jit_data = prog->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ prog = orig_prog;
+ goto out;
+ }
+ prog->aux->jit_data = jit_data;
+ }
+ if (jit_data->ctx.offsets) {
+ ctx = jit_data->ctx;
+ image_ptr = jit_data->image_ptr;
+ header = jit_data->header;
+ extra_pass = true;
+ image_size = sizeof(u32) * ctx.idx;
+ goto skip_init_ctx;
+ }
+
memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog;
ctx.cpu_architecture = cpu_architecture();
@@ -2181,7 +2207,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
if (ctx.offsets == NULL) {
prog = orig_prog;
- goto out;
+ goto out_off;
}
/* 1) fake pass to find in the length of the JITed code,
@@ -2243,6 +2269,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_imms;
}
+skip_init_ctx:
/* 2.) Actual pass to generate final JIT code */
ctx.target = (u32 *) image_ptr;
ctx.idx = 0;
@@ -2271,13 +2298,21 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog->jited = 1;
prog->jited_len = image_size;
+ if (!prog->is_func || extra_pass) {
out_imms:
#if __LINUX_ARM_ARCH__ < 7
- if (ctx.imm_count)
- kfree(ctx.imms);
+ if (ctx.imm_count)
+ kfree(ctx.imms);
#endif
out_off:
- kfree(ctx.offsets);
+ kfree(ctx.offsets);
+ kfree(jit_data);
+ prog->aux->jit_data = NULL;
+ } else {
+ jit_data->ctx = ctx;
+ jit_data->image_ptr = image_ptr;
+ jit_data->header = header;
+ }
out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
--
2.43.0