[PATCH 4/9] x86/bpf: Simplify prologue generation

From: Josh Poimboeuf
Date: Thu Jun 13 2019 - 11:21:16 EST


Simplify the BPF JIT prologue such that it more closely resembles a
typical compiler-generated prologue. This also reduces the prologue
size quite a bit.

The frame pointer setup instructions at the beginning don't actually
accomplish anything because RBP gets clobbered anyway later in the
prologue. So remove those instructions for now.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/net/bpf_jit_comp.c | 100 +++++++++++++++++-------------------
1 file changed, 47 insertions(+), 53 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index da8c988b0f0f..485692d4b163 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -186,56 +186,48 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64

-#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
-
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE 20

/*
* Emit x86-64 prologue code for BPF program and check its size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
+static void emit_prologue(u8 **pprog, u32 stack_depth)
{
u8 *prog = *pprog;
int cnt = 0;

+ /* push r15 */
+ EMIT2(0x41, 0x57);
+ /* push r14 */
+ EMIT2(0x41, 0x56);
+ /* push r13 */
+ EMIT2(0x41, 0x55);
/* push rbp */
EMIT1(0x55);
+ /* push rbx */
+ EMIT1(0x53);

- /* mov rbp,rsp */
- EMIT3(0x48, 0x89, 0xE5);
-
- /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
- EMIT3_off32(0x48, 0x81, 0xEC,
- round_up(stack_depth, 8) + AUX_STACK_SPACE);
-
- /* sub rbp, AUX_STACK_SPACE */
- EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
-
- /* mov qword ptr [rbp+0],rbx */
- EMIT4(0x48, 0x89, 0x5D, 0);
- /* mov qword ptr [rbp+8],r13 */
- EMIT4(0x4C, 0x89, 0x6D, 8);
- /* mov qword ptr [rbp+16],r14 */
- EMIT4(0x4C, 0x89, 0x75, 16);
- /* mov qword ptr [rbp+24],r15 */
- EMIT4(0x4C, 0x89, 0x7D, 24);
+ /*
+ * Push the tail call counter (tail_call_cnt) for eBPF tail calls.
+ * Initialized to zero.
+ *
+ * push $0
+ */
+ EMIT2(0x6a, 0x00);

- if (!ebpf_from_cbpf) {
- /*
- * Clear the tail call counter (tail_call_cnt): for eBPF tail
- * calls we need to reset the counter to 0. It's done in two
- * instructions, resetting RAX register to 0, and moving it
- * to the counter location.
- */
+ /*
+ * RBP is used for the BPF program's FP register. It points to the end
+ * of the program's stack area.
+ *
+ * mov rbp, rsp
+ */
+ EMIT3(0x48, 0x89, 0xE5);

- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
+ /* sub rsp, rounded_stack_depth */
+ EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));

- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
- }
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);

*pprog = prog;
}
@@ -245,19 +237,22 @@ static void emit_epilogue(u8 **pprog)
u8 *prog = *pprog;
int cnt = 0;

- /* mov rbx, qword ptr [rbp+0] */
- EMIT4(0x48, 0x8B, 0x5D, 0);
- /* mov r13, qword ptr [rbp+8] */
- EMIT4(0x4C, 0x8B, 0x6D, 8);
- /* mov r14, qword ptr [rbp+16] */
- EMIT4(0x4C, 0x8B, 0x75, 16);
- /* mov r15, qword ptr [rbp+24] */
- EMIT4(0x4C, 0x8B, 0x7D, 24);
+ /* lea rsp, [rbp+0x8] */
+ EMIT4(0x48, 0x8D, 0x65, 0x08);
+
+ /* pop rbx */
+ EMIT1(0x5B);
+ /* pop rbp */
+ EMIT1(0x5D);
+ /* pop r13 */
+ EMIT2(0x41, 0x5D);
+ /* pop r14 */
+ EMIT2(0x41, 0x5E);
+ /* pop r15 */
+ EMIT2(0x41, 0x5F);

- /* add rbp, AUX_STACK_SPACE */
- EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
- EMIT1(0xC9); /* leave */
- EMIT1(0xC3); /* ret */
+ /* ret */
+ EMIT1(0xC3);

*pprog = prog;
}
@@ -295,7 +290,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (35 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;

@@ -303,13 +298,13 @@ static void emit_bpf_tail_call(u8 **pprog)
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
+ EMIT3(0x8B, 0x45, 0x04); /* mov eax, dword ptr [rbp + 4] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
+#define OFFSET2 (27 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
+ EMIT3(0x89, 0x45, 0x04); /* mov dword ptr [rbp + 4], eax */

/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
@@ -437,8 +432,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;

- emit_prologue(&prog, bpf_prog->aux->stack_depth,
- bpf_prog_was_classic(bpf_prog));
+ emit_prologue(&prog, bpf_prog->aux->stack_depth);

for (i = 0; i < insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm;
--
2.20.1