Re: [PATCH 2/2] x86/asm: Fix inline asm call constraints for clang

From: Alexander Potapenko
Date: Tue Sep 19 2017 - 17:55:35 EST


On Tue, Sep 19, 2017 at 11:45 AM, Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:
> For inline asm statements which have a CALL instruction, we list the
> stack pointer as a constraint to convince GCC to ensure the frame
> pointer is set up first:
>
> static inline void foo()
> {
> register void *__sp asm(_ASM_SP);
> asm("call bar" : "+r" (__sp))
> }
>
> Unfortunately, that pattern causes clang to corrupt the stack pointer.
>
> There's actually an easier way to achieve the same goal in GCC, without
> causing trouble for clang. If we declare the stack pointer register
> variable as a global variable, and remove the constraint altogether,
> that convinces GCC to always set up the frame pointer before inserting
> *any* inline asm.
>
> It basically acts as if *every* inline asm statement has a CALL
> instruction. It's a bit overkill, but the performance impact should be
> negligible.
>
> Here are the vmlinux .text size differences with the following configs
> on GCC:
>
> - defconfig
> - defconfig without frame pointers
> - Fedora distro config
> - Fedora distro config without frame pointers
>
> defconfig defconfig-nofp distro distro-nofp
> before 9796300 9466764 9076191 8789745
> after 9796941 9462859 9076381 8785325
>
> With frame pointers, the text size increases slightly. Without frame
> pointers, the text size decreases, and a little more significantly.
>
> Reported-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
> Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
> ---
> arch/x86/include/asm/alternative.h | 3 +--
> arch/x86/include/asm/asm.h | 9 ++++++++
> arch/x86/include/asm/mshyperv.h | 27 ++++++++++--------------
> arch/x86/include/asm/paravirt_types.h | 14 ++++++------
> arch/x86/include/asm/preempt.h | 15 +++++--------
> arch/x86/include/asm/processor.h | 6 ++----
> arch/x86/include/asm/rwsem.h | 6 +++---
> arch/x86/include/asm/uaccess.h | 5 ++---
> arch/x86/include/asm/xen/hypercall.h | 5 ++---
> arch/x86/kvm/emulate.c | 3 +--
> arch/x86/kvm/vmx.c | 4 +---
> arch/x86/mm/fault.c | 3 +--
> tools/objtool/Documentation/stack-validation.txt | 20 +++++++++++++-----
> 13 files changed, 60 insertions(+), 60 deletions(-)
>
> diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> index 1b020381ab38..7aeb1cef4204 100644
> --- a/arch/x86/include/asm/alternative.h
> +++ b/arch/x86/include/asm/alternative.h
> @@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
> #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
> output, input...) \
> { \
> - register void *__sp asm(_ASM_SP); \
> asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
> "call %P[new2]", feature2) \
> - : output, "+r" (__sp) \
> + : output \
> : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
> [new2] "i" (newfunc2), ## input); \
> }
> diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
> index 676ee5807d86..ff8921d4615b 100644
> --- a/arch/x86/include/asm/asm.h
> +++ b/arch/x86/include/asm/asm.h
> @@ -132,4 +132,13 @@
> /* For C file, we already have NOKPROBE_SYMBOL macro */
> #endif
>
> +#ifndef __ASSEMBLY__
> +/*
> + * This variable declaration has the side effect of forcing GCC to always set
> + * up the frame pointer before inserting any inline asm. This is necessary
> + * because some inline asm statements have CALL instructions.
> + */
> +register unsigned int __sp_unused asm("esp");
Shouldn't this be "asm(_ASM_SP)"?
> +#endif
> +
> #endif /* _ASM_X86_ASM_H */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 63cc96f064dc..1c913ae27f99 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -179,16 +179,15 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> u64 input_address = input ? virt_to_phys(input) : 0;
> u64 output_address = output ? virt_to_phys(output) : 0;
> u64 hv_status;
> - register void *__sp asm(_ASM_SP);
>
> #ifdef CONFIG_X86_64
> if (!hv_hypercall_pg)
> return U64_MAX;
>
> - __asm__ __volatile__("mov %4, %%r8\n"
> - "call *%5"
> - : "=a" (hv_status), "+r" (__sp),
> - "+c" (control), "+d" (input_address)
> + __asm__ __volatile__("mov %3, %%r8\n"
> + "call *%4"
> + : "=a" (hv_status), "+c" (control),
> + "+d" (input_address)
> : "r" (output_address), "m" (hv_hypercall_pg)
> : "cc", "memory", "r8", "r9", "r10", "r11");
> #else
> @@ -200,9 +199,8 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> if (!hv_hypercall_pg)
> return U64_MAX;
>
> - __asm__ __volatile__("call *%7"
> - : "=A" (hv_status),
> - "+c" (input_address_lo), "+r" (__sp)
> + __asm__ __volatile__("call *%6"
> + : "=A" (hv_status), "+c" (input_address_lo)
> : "A" (control),
> "b" (input_address_hi),
> "D"(output_address_hi), "S"(output_address_lo),
> @@ -224,13 +222,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> {
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> - register void *__sp asm(_ASM_SP);
>
> #ifdef CONFIG_X86_64
> {
> - __asm__ __volatile__("call *%4"
> - : "=a" (hv_status), "+r" (__sp),
> - "+c" (control), "+d" (input1)
> + __asm__ __volatile__("call *%3"
> + : "=a" (hv_status), "+c" (control),
> + "+d" (input1)
> : "m" (hv_hypercall_pg)
> : "cc", "r8", "r9", "r10", "r11");
> }
> @@ -239,10 +236,8 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> u32 input1_hi = upper_32_bits(input1);
> u32 input1_lo = lower_32_bits(input1);
>
> - __asm__ __volatile__ ("call *%5"
> - : "=A"(hv_status),
> - "+c"(input1_lo),
> - "+r"(__sp)
> + __asm__ __volatile__ ("call *%4"
> + : "=A"(hv_status), "+c"(input1_lo)
> : "A" (control),
> "b" (input1_hi),
> "m" (hv_hypercall_pg)
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index 42873edd9f9d..b8966ed71ba5 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
> */
> #ifdef CONFIG_X86_32
> #define PVOP_VCALL_ARGS \
> - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \
> - register void *__sp asm("esp")
> + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
> +
> #define PVOP_CALL_ARGS PVOP_VCALL_ARGS
>
> #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
> @@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
> /* [re]ax isn't an arg, but the return val */
> #define PVOP_VCALL_ARGS \
> unsigned long __edi = __edi, __esi = __esi, \
> - __edx = __edx, __ecx = __ecx, __eax = __eax; \
> - register void *__sp asm("rsp")
> + __edx = __edx, __ecx = __ecx, __eax = __eax;
> +
> #define PVOP_CALL_ARGS PVOP_VCALL_ARGS
>
> #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
> @@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
> asm volatile(pre \
> paravirt_alt(PARAVIRT_CALL) \
> post \
> - : call_clbr, "+r" (__sp) \
> + : call_clbr \
> : paravirt_type(op), \
> paravirt_clobber(clbr), \
> ##__VA_ARGS__ \
> @@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
> asm volatile(pre \
> paravirt_alt(PARAVIRT_CALL) \
> post \
> - : call_clbr, "+r" (__sp) \
> + : call_clbr \
> : paravirt_type(op), \
> paravirt_clobber(clbr), \
> ##__VA_ARGS__ \
> @@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
> asm volatile(pre \
> paravirt_alt(PARAVIRT_CALL) \
> post \
> - : call_clbr, "+r" (__sp) \
> + : call_clbr \
> : paravirt_type(op), \
> paravirt_clobber(clbr), \
> ##__VA_ARGS__ \
> diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
> index ec1f3c651150..f66b189a1142 100644
> --- a/arch/x86/include/asm/preempt.h
> +++ b/arch/x86/include/asm/preempt.h
> @@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
>
> #ifdef CONFIG_PREEMPT
> extern asmlinkage void ___preempt_schedule(void);
> -# define __preempt_schedule() \
> -({ \
> - register void *__sp asm(_ASM_SP); \
> - asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
> -})
> +# define __preempt_schedule() \
> + asm volatile ("call ___preempt_schedule")
>
> extern asmlinkage void preempt_schedule(void);
> extern asmlinkage void ___preempt_schedule_notrace(void);
> -# define __preempt_schedule_notrace() \
> -({ \
> - register void *__sp asm(_ASM_SP); \
> - asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
> -})
> +# define __preempt_schedule_notrace() \
> + asm volatile ("call ___preempt_schedule_notrace")
> +
> extern asmlinkage void preempt_schedule_notrace(void);
> #endif
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 3fa26a61eabc..7a7969211527 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -677,8 +677,6 @@ static inline void sync_core(void)
> * Like all of Linux's memory ordering operations, this is a
> * compiler barrier as well.
> */
> - register void *__sp asm(_ASM_SP);
> -
> #ifdef CONFIG_X86_32
> asm volatile (
> "pushfl\n\t"
> @@ -686,7 +684,7 @@ static inline void sync_core(void)
> "pushl $1f\n\t"
> "iret\n\t"
> "1:"
> - : "+r" (__sp) : : "memory");
> + : : : "memory");
> #else
> unsigned int tmp;
>
> @@ -703,7 +701,7 @@ static inline void sync_core(void)
> "iretq\n\t"
> UNWIND_HINT_RESTORE
> "1:"
> - : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
> + : "=&r" (tmp) : : "cc", "memory");
> #endif
> }
>
> diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
> index a34e0d4b957d..f8f127a2a300 100644
> --- a/arch/x86/include/asm/rwsem.h
> +++ b/arch/x86/include/asm/rwsem.h
> @@ -103,10 +103,9 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
> ({ \
> long tmp; \
> struct rw_semaphore* ret; \
> - register void *__sp asm(_ASM_SP); \
> \
> asm volatile("# beginning down_write\n\t" \
> - LOCK_PREFIX " xadd %1,(%4)\n\t" \
> + LOCK_PREFIX " xadd %1,(%3)\n\t" \
> /* adds 0xffff0001, returns the old value */ \
> " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
> /* was the active mask 0 before? */\
> @@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
> " call " slow_path "\n" \
> "1:\n" \
> "# ending down_write" \
> - : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
> + : "+m" (sem->count), "=d" (tmp), \
> + "=a" (ret) \
> : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
> : "memory", "cc"); \
> ret; \
> diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> index 184eb9894dae..113ddb8ce0d8 100644
> --- a/arch/x86/include/asm/uaccess.h
> +++ b/arch/x86/include/asm/uaccess.h
> @@ -166,11 +166,10 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
> ({ \
> int __ret_gu; \
> register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
> - register void *__sp asm(_ASM_SP); \
> __chk_user_ptr(ptr); \
> might_fault(); \
> - asm volatile("call __get_user_%P4" \
> - : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \
> + asm volatile("call __get_user_%P3" \
> + : "=a" (__ret_gu), "=r" (__val_gu) \
> : "0" (ptr), "i" (sizeof(*(ptr)))); \
> (x) = (__force __typeof__(*(ptr))) __val_gu; \
> __builtin_expect(__ret_gu, 0); \
> diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
> index 9606688caa4b..6264dba01896 100644
> --- a/arch/x86/include/asm/xen/hypercall.h
> +++ b/arch/x86/include/asm/xen/hypercall.h
> @@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
> register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
> register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
> register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
> - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
> - register void *__sp asm(_ASM_SP);
> + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
>
> -#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp)
> +#define __HYPERCALL_0PARAM "=r" (__res)
> #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
> #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
> #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 16bf6655aa85..5e82e0821c1f 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
>
> static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
> {
> - register void *__sp asm(_ASM_SP);
> ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
>
> if (!(ctxt->d & ByteOp))
> @@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
>
> asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
> : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
> - [fastop]"+S"(fop), "+r"(__sp)
> + [fastop]"+S"(fop)
> : "c"(ctxt->src2.val));
>
> ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 06c0c6d0541e..a693362f41af 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
> static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
> {
> u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> - register void *__sp asm(_ASM_SP);
>
> if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
> == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
> @@ -9063,9 +9062,8 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
> "call *%[entry]\n\t"
> :
> #ifdef CONFIG_X86_64
> - [sp]"=&r"(tmp),
> + [sp]"=&r"(tmp)
> #endif
> - "+r"(__sp)
> :
> [entry]"r"(entry),
> [ss]"i"(__KERNEL_DS),
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index b836a7274e12..4457e41378e4 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
> if (is_vmalloc_addr((void *)address) &&
> (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
> address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
> - register void *__sp asm("rsp");
> unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
> /*
> * We're likely to be running with very little stack space
> @@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
> asm volatile ("movq %[stack], %%rsp\n\t"
> "call handle_stack_overflow\n\t"
> "1: jmp 1b"
> - : "+r" (__sp)
> + :
> : "D" ("kernel stack overflow (page fault)"),
> "S" (regs), "d" (address),
> [stack] "rm" (stack));
> diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
> index 6a1af43862df..18af4aa7488d 100644
> --- a/tools/objtool/Documentation/stack-validation.txt
> +++ b/tools/objtool/Documentation/stack-validation.txt
> @@ -192,12 +192,22 @@ they mean, and suggestions for how to fix them.
> use the manual unwind hint macros in asm/unwind_hints.h.
>
> If it's a GCC-compiled .c file, the error may be because the function
> - uses an inline asm() statement which has a "call" instruction. An
> - asm() statement with a call instruction must declare the use of the
> - stack pointer in its output operand. For example, on x86_64:
> + uses an inline asm() statement which has a "call" instruction. To
> + fix this, either:
>
> - register void *__sp asm("rsp");
> - asm volatile("call func" : "+r" (__sp));
> + a) list the stack pointer as an input/output constraint;
> +
> + register void *__sp asm("rsp");
> + asm volatile("call func" : "+r" (__sp));
> +
> + or
> +
> + b) declare a global register variable for the stack pointer.
> +
> + arch/x86/include/asm/asm.h:register unsigned int __sp_unused asm("esp");
> +
> + This is the strategy used on x86_64. It ensures that GCC always
> + sets up the frame pointer before inserting *any* inline asm.
>
> Otherwise the stack frame may not get created before the call.
>
> --
> 2.13.5
>



--
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-StraÃe, 33
80636 MÃnchen

GeschÃftsfÃhrer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg