[PATCH bpf-next 3/3] bpf: arraymap: Skip boundscheck during inlining when possible
From: Daniel Xu
Date: Mon Jan 20 2025 - 23:36:25 EST
For regular arraymaps and percpu arraymaps, if the lookup is known to be
inbounds, the inlined bounds check can be omitted. One fewer jump puts less
pressure on the branch predictor. While it probably won't affect real
workloads much, we can basically get this for free. So might as well -
free wins are nice.
JIT diff for regular arraymap (x86-64):
; val = bpf_map_lookup_elem(&map_array, &key);
- 22: movabsq $-131387164803072, %rdi
+ 22: movabsq $-131387246749696, %rdi
2c: addq $472, %rdi
33: movl (%rsi), %eax
- 36: cmpq $2, %rax
- 3a: jae 0x45
- 3c: imulq $48, %rax, %rax
- 40: addq %rdi, %rax
- 43: jmp 0x47
- 45: xorl %eax, %eax
- 47: movl $4, %edi
+ 36: imulq $48, %rax, %rax
+ 3a: addq %rdi, %rax
+ 3d: jmp 0x41
+ 3f: xorl %eax, %eax
+ 41: movl $4, %edi
JIT diff for percpu arraymap (x86-64):
; val = bpf_map_lookup_elem(&map_array_pcpu, &key);
- 22: movabsq $-131387183532032, %rdi
+ 22: movabsq $-131387273779200, %rdi
2c: addq $472, %rdi
33: movl (%rsi), %eax
- 36: cmpq $2, %rax
- 3a: jae 0x52
- 3c: shlq $3, %rax
- 40: addq %rdi, %rax
- 43: movq (%rax), %rax
- 47: addq %gs:170664, %rax
- 50: jmp 0x54
- 52: xorl %eax, %eax
- 54: movl $4, %edi
+ 36: shlq $3, %rax
+ 3a: addq %rdi, %rax
+ 3d: movq (%rax), %rax
+ 41: addq %gs:170664, %rax
+ 4a: jmp 0x4e
+ 4c: xorl %eax, %eax
+ 4e: movl $4, %edi
Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
---
kernel/bpf/arraymap.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8dbdceeead95..7385104dc0d0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -221,11 +221,13 @@ static int array_map_gen_lookup(struct bpf_map *map,
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
- if (!map->bypass_spec_v1) {
- *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
- *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
- } else {
- *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+ if (!inbounds) {
+ if (!map->bypass_spec_v1) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+ }
}
if (is_power_of_2(elem_size)) {
@@ -269,11 +271,13 @@ static int percpu_array_map_gen_lookup(struct bpf_map *map,
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
*insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
- if (!map->bypass_spec_v1) {
- *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
- *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
- } else {
- *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+ if (!inbounds) {
+ if (!map->bypass_spec_v1) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+ }
}
*insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
--
2.47.1