[PATCH RESEND bpf v2 2/2] x86/bpf: Fix IP for relocating call depth accounting

From: Uros Bizjak
Date: Mon Apr 01 2024 - 14:59:18 EST


From: Joan Bruguera Micó <joanbrugueram@xxxxxxxxx>

The commit:

59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing to PER_CPU_VAR()")

made PER_CPU_VAR() to use rip-relative addressing, hence
INCREMENT_CALL_DEPTH macro and skl_call_thunk_template got rip-relative
asm code inside of it. A follow up commit:

17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")

changed x86_call_depth_emit_accounting() to use apply_relocation(),
but mistakenly assumed that the code is being patched in-place (where
the destination of the relocation matches the address of the code),
using *pprog as the destination ip. This is not true for the call depth
accounting, emitted by the BPF JIT, so the calculated address was wrong,
JIT-ed BPF progs on kernels with call depth tracking got broken and
usually caused a page fault.

Pass the destination IP when the BPF JIT emits call depth accounting.

Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@xxxxxxxxx>
Reviewed-by: Uros Bizjak <ubizjak@xxxxxxxxx>
Acked-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
---
v2: Add more details to the commit message.
---
arch/x86/include/asm/alternative.h | 4 ++--
arch/x86/kernel/callthunks.c | 4 ++--
arch/x86/net/bpf_jit_comp.c | 19 ++++++++-----------
3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f9..67b68d0d17d1 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0..e92ff0c11db8 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}

-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;

memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);

memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e55745f512e1..df5fac428408 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- ip += x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}

@@ -1972,20 +1972,17 @@ st: if (is_imm8(insn->off))

/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];

func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
--
2.42.0