Re: [PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64
From: Josh Poimboeuf
Date: Mon Nov 26 2018 - 16:26:38 EST
On Mon, Nov 26, 2018 at 09:08:01PM +0100, Peter Zijlstra wrote:
> On Mon, Nov 26, 2018 at 11:56:24AM -0600, Josh Poimboeuf wrote:
> > diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> > index d3869295b88d..8fd6c8556750 100644
> > --- a/arch/x86/kernel/static_call.c
> > +++ b/arch/x86/kernel/static_call.c
> > @@ -7,24 +7,19 @@
> >
> > #define CALL_INSN_SIZE 5
> >
> > +unsigned long bp_handler_call_return_addr;
> >
> > +static void static_call_bp_handler(struct pt_regs *regs)
> > +{
> > #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
> > + /*
> > + * Push the return address on the stack so the "called" function will
> > + * return to immediately after the call site.
> > + */
> > + regs->sp -= sizeof(long);
> > + *(unsigned long *)regs->sp = bp_handler_call_return_addr;
> > #endif
> > +}
> >
> > void arch_static_call_transform(void *site, void *tramp, void *func)
> > {
> > @@ -52,14 +47,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
> > opcodes[0] = insn_opcode;
> > memcpy(&opcodes[1], &dest_relative, CALL_INSN_SIZE - 1);
> >
> > if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
> > + bp_handler_call_return_addr = insn + CALL_INSN_SIZE;
> >
> > /* Patch the call site: */
> > text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,
> > - static_call_bp_handler);
> > + static_call_bp_handler, func);
> >
> > done:
> > mutex_unlock(&text_mutex);
>
>
> like maybe something along the lines of:
>
> struct sc_data {
> unsigned long ret;
> unsigned long ip;
> };
>
> void sc_handler(struct pt_regs *regs, void *data)
> {
> struct sc_data *scd = data;
>
> regs->sp -= sizeof(long);
> *(unsigned long *)regs->sp = scd->ret;
> regs->ip = scd->ip;
> }
>
> arch_static_call_transform()
> {
> ...
>
> scd = (struct sc_data){
> .ret = insn + CALL_INSN_SIZE,
> .ip = (unsigned long)func,
> };
>
> text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,
> sc_handler, (void *)&scd);
>
> ...
> }
Yeah, that's probably better. I assume you also mean that we would have
all text_poke_bp() users create a handler callback? That way the
interface is clear and consistent for everybody. Like:
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..04d6cf838fb7 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -20,6 +20,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+typedef void (*bp_handler_t)(struct pt_regs *regs, void *data);
+
/*
* Clear and restore the kernel write-protection flag on the local CPU.
* Allows the kernel to edit read-only pages.
@@ -36,7 +38,8 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len,
+ bp_handler_t handler, void *data);
extern int after_bootmem;
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ebeac487a20c..547af714bd60 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -738,7 +738,8 @@ static void do_sync_core(void *info)
}
static bool bp_patching_in_progress;
-static void *bp_int3_handler, *bp_int3_addr;
+static void *bp_int3_data, *bp_int3_addr;
+static bp_handler_t bp_int3_handler;
int poke_int3_handler(struct pt_regs *regs)
{
@@ -746,11 +747,11 @@ int poke_int3_handler(struct pt_regs *regs)
* Having observed our INT3 instruction, we now must observe
* bp_patching_in_progress.
*
- * in_progress = TRUE INT3
- * WMB RMB
- * write INT3 if (in_progress)
+ * in_progress = TRUE INT3
+ * WMB RMB
+ * write INT3 if (in_progress)
*
- * Idem for bp_int3_handler.
+ * Idem for bp_int3_data.
*/
smp_rmb();
@@ -760,8 +761,7 @@ int poke_int3_handler(struct pt_regs *regs)
if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
return 0;
- /* set up the specified breakpoint handler */
- regs->ip = (unsigned long) bp_int3_handler;
+ bp_int3_handler(regs, bp_int3_data);
return 1;
@@ -772,7 +772,8 @@ int poke_int3_handler(struct pt_regs *regs)
* @addr: address to patch
* @opcode: opcode of new instruction
* @len: length to copy
- * @handler: address to jump to when the temporary breakpoint is hit
+ * @handler: handler to call from int3 context
+ * @data: opaque data passed to handler
*
* Modify multi-byte instruction by using int3 breakpoint on SMP.
* We completely avoid stop_machine() here, and achieve the
@@ -787,11 +788,13 @@ int poke_int3_handler(struct pt_regs *regs)
* replacing opcode
* - sync cores
*/
-void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void *text_poke_bp(void *addr, const void *opcode, size_t len,
+ bp_handler_t handler, void *data)
{
unsigned char int3 = 0xcc;
bp_int3_handler = handler;
+ bp_int3_data = data;
bp_int3_addr = (u8 *)addr + sizeof(int3);
bp_patching_in_progress = true;
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index aac0c1f7e354..d4b0abe4912d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,6 +37,11 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
+static inline void jump_label_bp_handler(struct pt_regs *regs, void *data)
+{
+ regs->ip += JUMP_LABEL_NOP_SIZE - 1;
+}
+
static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
void *(*poker)(void *, const void *, size_t),
@@ -91,7 +96,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
}
text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
- (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ jump_label_bp_handler, NULL);
}
void arch_jump_label_transform(struct jump_entry *entry,
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 40b16b270656..b2dffdd6068d 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -424,6 +424,11 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
goto out;
}
+static void kprobes_poke_bp_handler(struct pt_regs *regs, void *data)
+{
+ regs->ip = data;
+}
+
/*
* Replace breakpoints (int3) with relative jumps.
* Caller must call with locking kprobe_mutex and text_mutex.
@@ -447,7 +452,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
*(s32 *)(&insn_buf[1]) = rel;
text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ kprobes_poke_bp_handler, op->optinsn.insn);
list_del_init(&op->list);
}
@@ -462,7 +467,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
insn_buf[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ kprobes_poke_bp_handler, op->optinsn.insn);
}
/*
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index d3869295b88d..e05ebc6d4db5 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -7,24 +7,30 @@
#define CALL_INSN_SIZE 5
-void static_call_bp_handler(void);
-void *bp_handler_dest;
-void *bp_handler_continue;
-
-asm(".pushsection .text, \"ax\" \n"
- ".globl static_call_bp_handler \n"
- ".type static_call_bp_handler, @function \n"
- "static_call_bp_handler: \n"
-#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
- ANNOTATE_RETPOLINE_SAFE
- "call *bp_handler_dest \n"
- ANNOTATE_RETPOLINE_SAFE
- "jmp *bp_handler_continue \n"
-#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
- ANNOTATE_RETPOLINE_SAFE
- "jmp *bp_handler_dest \n"
-#endif
- ".popsection \n");
+struct static_call_bp_data {
+ unsigned long func, ret;
+};
+
+static void static_call_bp_handler(struct pt_regs *regs, void *_data)
+{
+ struct static_call_bp_data *data = _data;
+
+ /*
+ * For inline static calls, push the return address on the stack so the
+ * "called" function will return to the location immediately after the
+ * call site.
+ *
+ * NOTE: This code will need to be revisited when kernel CET gets
+ * implemented.
+ */
+ if (data->ret) {
+ regs->sp -= sizeof(long);
+ *(unsigned long *)regs->sp = data->ret;
+ }
+
+ /* The exception handler will 'return' to the destination function. */
+ regs->ip = data->func;
+}
void arch_static_call_transform(void *site, void *tramp, void *func)
{
@@ -32,11 +38,17 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
unsigned long insn;
unsigned char insn_opcode;
unsigned char opcodes[CALL_INSN_SIZE];
+ struct static_call_bp_data handler_data;
+
+ handler_data.func = (unsigned long)func;
- if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE)) {
insn = (unsigned long)site;
- else
+ handler_data.ret = insn + CALL_INSN_SIZE;
+ } else {
insn = (unsigned long)tramp;
+ handler_data.ret = 0;
+ }
mutex_lock(&text_mutex);
@@ -52,14 +64,9 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
opcodes[0] = insn_opcode;
memcpy(&opcodes[1], &dest_relative, CALL_INSN_SIZE - 1);
- /* Set up the variables for the breakpoint handler: */
- bp_handler_dest = func;
- if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
- bp_handler_continue = (void *)(insn + CALL_INSN_SIZE);
-
/* Patch the call site: */
text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,
- static_call_bp_handler);
+ static_call_bp_handler, &handler_data);
done:
mutex_unlock(&text_mutex);