Re: [PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64

From: Peter Zijlstra
Date: Mon Nov 26 2018 - 11:02:39 EST


On Mon, Nov 26, 2018 at 07:55:00AM -0600, Josh Poimboeuf wrote:
> diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> index 8026d176f25c..d3869295b88d 100644
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -9,13 +9,21 @@
>
> void static_call_bp_handler(void);
> void *bp_handler_dest;
> +void *bp_handler_continue;
>
> asm(".pushsection .text, \"ax\" \n"
> ".globl static_call_bp_handler \n"
> ".type static_call_bp_handler, @function \n"
> "static_call_bp_handler: \n"
> - "ANNOTATE_RETPOLINE_SAFE \n"
> +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
> + ANNOTATE_RETPOLINE_SAFE
> + "call *bp_handler_dest \n"
> + ANNOTATE_RETPOLINE_SAFE
> + "jmp *bp_handler_continue \n"
> +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
> + ANNOTATE_RETPOLINE_SAFE
> "jmp *bp_handler_dest \n"
> +#endif
> ".popsection \n");
>
> void arch_static_call_transform(void *site, void *tramp, void *func)
> @@ -25,7 +33,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
> unsigned char insn_opcode;
> unsigned char opcodes[CALL_INSN_SIZE];
>
> - insn = (unsigned long)tramp;
> + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
> + insn = (unsigned long)site;
> + else
> + insn = (unsigned long)tramp;
>
> mutex_lock(&text_mutex);
>
> @@ -41,8 +52,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
> opcodes[0] = insn_opcode;
> memcpy(&opcodes[1], &dest_relative, CALL_INSN_SIZE - 1);
>
> - /* Set up the variable for the breakpoint handler: */
> + /* Set up the variables for the breakpoint handler: */
> bp_handler_dest = func;
> + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
> + bp_handler_continue = (void *)(insn + CALL_INSN_SIZE);
>
> /* Patch the call site: */
> text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,

OK, so this is where that static_call_bp_handler comes from; you need
that CALL to frob the stack.

But I still think it is broken; consider:

CPU0 CPU1

bp_handler = ponies;

text_poke_bp(, &static_call_bp_handler)
text_poke(&int3);
on_each_cpu(sync)
<IPI>
...
</IPI>

text_poke(/* all but first bytes */)
on_each_cpu(sync)
<IPI>
...
</IPI>

<int3>
pt_regs->ip = &static_call_bp_handler
</int3>

// VCPU takes a nap...
text_poke(/* first byte */)
on_each_cpu(sync)
<IPI>
...
</IPI>

// VCPU sleeps more
bp_handler = unicorn;

CALL unicorn

*whoops*

Now, granted, that is all rather 'unlikely', but that never stopped
Murphy.