Re: [PATCH, RFC] x86: also CFI-annotate certain inline asm()s
From: Andy Lutomirski
Date: Tue Nov 04 2014 - 14:40:45 EST
On 11/04/2014 01:24 AM, Jan Beulich wrote:
> The main obstacle to having done this long ago was the need to
> determine whether annotations are needed in the first place: They need
> to be avoided when a frame pointer got set up. Since I can't see a way
> to determine this before the compilation phase, this is being achieved
> by inspecting the memory address generated by the compiler in an
> interposed assembler macro. Of course this isn't really nice code, and
> this the main reason I'm posting this as RFC only at this point (with
> the hope that maybe someone has an idea of how to achieve the same
> thing in a more elegant way).
Ask binutils for help?
Is the issue that the CFI annotation you need is different depending on
whether there's a frame pointer or not? If so, can you add some
comments so that mere asm mortals have some prayer of understanding how
your magic works and what the desired output annotations are in the
various cases?
I have enough trouble understanding what the CFA is, and the sorry state
of the docs don't really help.
--Andy
>
> The reason for this being needed are various inline stack pointer
> adjustments. In particular in the context of perf and its use of NMIs,
> Tony observed the unwinder to get confused when an interruption
> happened in the middle of an inline push/pop pair. While the live
> unwinder continues to be of no interest upstream, NMIs being used to
> trigger crash dumps (via external button or watchdog) would appear to
> suffer from the same problem.
>
> With the irqflags generic code generation issue out of the way (see
> https://patchwork.kernel.org/patch/5223561/), the only differences in
> generated code are a number pointless extra frame pointer adjustments
> the compiler decides it needs.
>
> Several pieces of code were intentionally left untouched:
>
> - use site compiled with -fno-omit-frame-pointer:
> arch/x86/include/asm/switch_to.h:switch_to()
>
> - annotations in helper sections (like .fixup) don't work anyway:
> arch/x86/lib/usercopy_32.c
>
> - call stack reaching there unlikely to be of interest:
> arch/x86/boot/
> arch/x86/kernel/cpu/common.c:flag_is_changeable_p()
> arch/x86/include/asm/apm.h
> arch/x86/include/asm/kexec.h:crash_setup_regs()
> arch/x86/pci/pcbios.c:pcibios_get_irq_routing_table()
> arch/x86/xen/enlighten.c:xen_setup_gdt()
> drivers/input/misc/wistron_btns.c:call_bios()
> drivers/pci/hotplug/cpqphp_nvram.c:access_EV()
> drivers/pnp/pnpbios/
> drivers/watchdog/hpwdt.c:asminline_call
>
> - to be done separately (if desired/needed):
> anything pv-ops related (patching as well as wrappers)
> arch/x86/kernel/kprobes/
> arch/x86/kernel/kvm/
> drivers/char/i8k.c:i8k_smm()
> drivers/char/toshiba.c:tosh_smm()
> drivers/lguest/x86/
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Tony Jones <tonyj@xxxxxxx>
> ---
> arch/x86/Makefile | 22 ++++++++
> arch/x86/include/asm/frame.h | 103 ++++++++++++++++++++++++++++++++++++++-
> arch/x86/include/asm/irqflags.h | 18 ++++--
> arch/x86/include/asm/processor.h | 22 +++++---
> arch/x86/kernel/irq_32.c | 15 ++++-
> drivers/cpufreq/speedstep-smi.c | 32 ++++++------
> drivers/crypto/padlock-aes.c | 14 +++--
> 7 files changed, 188 insertions(+), 38 deletions(-)
>
> --- 3.18-rc3/arch/x86/Makefile
> +++ 3.18-rc3-x86-inline-unwind-info/arch/x86/Makefile
> @@ -131,6 +131,28 @@ ifdef CONFIG_X86_X32
> endif
> export CONFIG_X86_X32_ABI
>
> +ifeq ($(CONFIG_UNWIND_INFO),y)
> +# Does gcc support generating CFI annotations via .cfi_* directives?
> +ifeq ($(call cc-option,-fdwarf2-cfi-asm),-fdwarf2-cfi-asm)
> +cfi-directives := $(call try-run,\
> + /bin/echo '\
> + void test(void) { \
> + CFI_DECL; \
> + asm volatile(".cfi_undefined 0"); \
> + asm volatile(CFI_ADJUST_CFA_OFFSET(1) :: CFI_INPUTS()); \
> + }' | \
> + $(CC) -DCONFIG_CC_AS_CFI -include $(srctree)/arch/x86/include/asm/frame.h -c -x c -o "$$TMP" - \
> + ,y,n)
> +ifeq ($(cfi-directives),y)
> +KBUILD_CFLAGS += -DCONFIG_CC_AS_CFI
> +else
> +$(warning Inline function CFI annotations not usable with $(CC))
> +endif
> +else
> +$(warning Inline function CFI annotations not supported by $(CC))
> +endif
> +endif
> +
> # Don't unroll struct assignments with kmemcheck enabled
> ifeq ($(CONFIG_KMEMCHECK),y)
> KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
> --- 3.18-rc3/arch/x86/include/asm/frame.h
> +++ 3.18-rc3-x86-inline-unwind-info/arch/x86/include/asm/frame.h
> @@ -1,3 +1,8 @@
> +#if 0
> +.if 0
> +#elif !defined(_ASM_X86_FRAME_H)
> +#define _ASM_X86_FRAME_H
> +
> #ifdef __ASSEMBLY__
>
> #include <asm/asm.h>
> @@ -23,4 +28,100 @@
> .endm
> #endif
>
> -#endif /* __ASSEMBLY__ */
> +#elif defined(CONFIG_CC_AS_CFI)
> +
> +#include <linux/stringify.h>
> +
> +asm(".include \"" __FILE__ "\"");
> +
> +#define CFI_DECL volatile struct {} cfi_var
> +/* Helper: Even inside .if 0 blocks gas warns when strings start a line. */
> +#define CFI_BLANK
> +#define CFI_PREFIX "; maybe_cfi_annotate$ %[cfi_var],%c[cfi_sz],"
> +#define CFI_ADJUST_CFA_OFFSET(nr) CFI_PREFIX "adjust_cfa_offset" \
> + CFI_BLANK " (" __stringify(nr) ") * (%c[cfi_sz])"
> +#define CFI_DEF_CFA_REGISTER(reg) CFI_PREFIX "def_cfa_register " #reg
> +#define CFI_INPUTS(more...) [cfi_var] "m" (cfi_var), \
> + [cfi_sz] "i" (sizeof(void *)), ## more
> +
> +#else
> +
> +#define CFI_DECL struct cfi_dummy
> +#define CFI_ADJUST_CFA_OFFSET(nr)
> +#define CFI_DEF_CFA_REGISTER(reg)
> +#define CFI_INPUTS(more...) more
> +
> +#endif /* __ASSEMBLY__ / CONFIG_CC_AS_CFI */
> +
> +#elif 0
> +.endif
> +.macro maybe_cfi_annotate$ mem:req, sz:req, annot:vararg
> + nest$ = 0
> + state$ = 0
> + skip$ = 0
> + .irpc c,\mem
> + .ifeqs "(", "\c"
> + nest$ = nest$ + 1
> + state$ = 0
> + .endif
> +
> + .ifeqs ")", "\c"
> + nest$ = nest$ - 1
> + .elseif (nest$ == 1) && (skip$ == 0)
> + .if state$ == 0
> + .ifeqs "%", "\c"
> + state$ = 1
> + .endif
> + .elseif state$ == 1
> + .if \sz == 4
> + .ifeqs "e", "\c"
> + state$ = 2
> + .endif
> + .endif
> +
> + .if \sz == 8
> + .ifeqs "r", "\c"
> + state$ = 2
> + .endif
> + .endif
> +
> + .if state$ <> 2
> + .exitm
> + .endif
> + .elseif state$ == 2
> + .ifeqs "s", "\c"
> + state$ = state$ | 0x04
> + .endif
> +
> + .ifeqs "b", "\c"
> + state$ = state$ | 0x08
> + .endif
> +
> + .if (state$ & 0x0c) == 0
> + .exitm
> + .endif
> + .elseif ((state$ & 3) == 2) && ((state$ & 0x0c) <> 0)
> + .ifeqs "p", "\c"
> + state$ = state$ | 0x10
> + .else
> + .exitm
> + .endif
> + .else
> + .ifeqs ",", "\c"
> + skip$ = 1
> + .else
> + .exitm
> + .endif
> + .endif
> + .endif
> + .endr
> +
> + .if (nest$ <> 0) && (state$ == 0)
> + .error "unmatched parentheses in '\mem\(')"
> + .elseif (nest$ == 0) && (state$ == 0x16)
> + .cfi_\annot
> + .elseif (nest$ <> 0) || (state$ <> 0x1a)
> + .error "unsupported memory expression '\mem\(')"
> + .endif
> +.endm
> +#endif
> --- 3.18-rc3/arch/x86/include/asm/irqflags.h
> +++ 3.18-rc3-x86-inline-unwind-info/arch/x86/include/asm/irqflags.h
> @@ -4,6 +4,9 @@
> #include <asm/processor-flags.h>
>
> #ifndef __ASSEMBLY__
> +
> +#include <asm/frame.h>
> +
> /*
> * Interrupt control:
> */
> @@ -11,6 +14,7 @@
> static inline unsigned long native_save_fl(void)
> {
> unsigned long flags;
> + CFI_DECL;
>
> /*
> * "=rm" is safe here, because "pop" adjusts the stack before
> @@ -18,9 +22,10 @@ static inline unsigned long native_save_
> * documented behavior of the "pop" instruction.
> */
> asm volatile("# __raw_save_flags\n\t"
> - "pushf ; pop %0"
> + "pushf" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> + "pop %0" CFI_ADJUST_CFA_OFFSET(-1)
> : "=rm" (flags)
> - : /* no input */
> + : CFI_INPUTS()
> : "memory");
>
> return flags;
> @@ -28,10 +33,13 @@ static inline unsigned long native_save_
>
> static inline void native_restore_fl(unsigned long flags)
> {
> - asm volatile("push %0 ; popf"
> + CFI_DECL;
> +
> + asm volatile("push %[flags]" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> + "popf" CFI_ADJUST_CFA_OFFSET(-1)
> : /* no output */
> - :"g" (flags)
> - :"memory", "cc");
> + : CFI_INPUTS([flags] "g" (flags))
> + : "memory", "cc");
> }
>
> static inline void native_irq_disable(void)
> --- 3.18-rc3/arch/x86/include/asm/processor.h
> +++ 3.18-rc3-x86-inline-unwind-info/arch/x86/include/asm/processor.h
> @@ -8,7 +8,6 @@ struct task_struct;
> struct mm_struct;
>
> #include <asm/vm86.h>
> -#include <asm/math_emu.h>
> #include <asm/segment.h>
> #include <asm/types.h>
> #include <asm/sigcontext.h>
> @@ -22,6 +21,11 @@ struct mm_struct;
> #include <asm/nops.h>
> #include <asm/special_insns.h>
>
> +#ifdef CONFIG_X86_32
> +#include <asm/math_emu.h>
> +#include <asm/frame.h>
> +#endif
> +
> #include <linux/personality.h>
> #include <linux/cpumask.h>
> #include <linux/cache.h>
> @@ -531,15 +535,17 @@ static inline void native_set_iopl_mask(
> {
> #ifdef CONFIG_X86_32
> unsigned int reg;
> + CFI_DECL;
>
> - asm volatile ("pushfl;"
> - "popl %0;"
> - "andl %1, %0;"
> - "orl %2, %0;"
> - "pushl %0;"
> - "popfl"
> + asm volatile ("pushfl" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> + "popl %0" CFI_ADJUST_CFA_OFFSET(-1) "\n\t"
> + "andl %[invmask], %0\n\t"
> + "orl %[mask], %0\n\t"
> + "pushl %0" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> + "popfl" CFI_ADJUST_CFA_OFFSET(-1)
> : "=&r" (reg)
> - : "i" (~X86_EFLAGS_IOPL), "r" (mask));
> + : CFI_INPUTS([invmask] "i" (~X86_EFLAGS_IOPL),
> + [mask] "r" (mask)));
> #endif
> }
>
> --- 3.18-rc3/arch/x86/kernel/irq_32.c
> +++ 3.18-rc3-x86-inline-unwind-info/arch/x86/kernel/irq_32.c
> @@ -20,6 +20,7 @@
> #include <linux/mm.h>
>
> #include <asm/apic.h>
> +#include <asm/frame.h>
>
> DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
> EXPORT_PER_CPU_SYMBOL(irq_stat);
> @@ -60,12 +61,15 @@ DEFINE_PER_CPU(struct irq_stack *, softi
>
> static void call_on_stack(void *func, void *stack)
> {
> + CFI_DECL;
> +
> asm volatile("xchgl %%ebx,%%esp \n"
> + CFI_DEF_CFA_REGISTER(ebx) "\n"
> "call *%%edi \n"
> "movl %%ebx,%%esp \n"
> + CFI_DEF_CFA_REGISTER(esp)
> : "=b" (stack)
> - : "0" (stack),
> - "D"(func)
> + : CFI_INPUTS("0" (stack), "D" (func))
> : "memory", "cc", "edx", "ecx", "eax");
> }
>
> @@ -86,6 +90,7 @@ execute_on_irq_stack(int overflow, struc
> {
> struct irq_stack *curstk, *irqstk;
> u32 *isp, *prev_esp, arg1, arg2;
> + CFI_DECL;
>
> curstk = (struct irq_stack *) current_stack();
> irqstk = __this_cpu_read(hardirq_stack);
> @@ -109,11 +114,13 @@ execute_on_irq_stack(int overflow, struc
> call_on_stack(print_stack_overflow, isp);
>
> asm volatile("xchgl %%ebx,%%esp \n"
> + CFI_DEF_CFA_REGISTER(ebx) "\n"
> "call *%%edi \n"
> "movl %%ebx,%%esp \n"
> + CFI_DEF_CFA_REGISTER(esp)
> : "=a" (arg1), "=d" (arg2), "=b" (isp)
> - : "0" (irq), "1" (desc), "2" (isp),
> - "D" (desc->handle_irq)
> + : CFI_INPUTS("0" (irq), "1" (desc), "2" (isp),
> + "D" (desc->handle_irq))
> : "memory", "cc", "ecx");
> return 1;
> }
> --- 3.18-rc3/drivers/cpufreq/speedstep-smi.c
> +++ 3.18-rc3-x86-inline-unwind-info/drivers/cpufreq/speedstep-smi.c
> @@ -19,6 +19,7 @@
> #include <linux/cpufreq.h>
> #include <linux/delay.h>
> #include <linux/io.h>
> +#include <asm/frame.h>
> #include <asm/ist.h>
> #include <asm/cpu_device_id.h>
>
> @@ -64,6 +65,7 @@ static int speedstep_smi_ownership(void)
> u32 command, result, magic, dummy;
> u32 function = GET_SPEEDSTEP_OWNER;
> unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
> + CFI_DECL;
>
> command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
> magic = virt_to_phys(magic_data);
> @@ -72,14 +74,14 @@ static int speedstep_smi_ownership(void)
> command, smi_port);
>
> __asm__ __volatile__(
> - "push %%ebp\n"
> + "push %%ebp" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> "out %%al, (%%dx)\n"
> - "pop %%ebp\n"
> + "pop %%ebp" CFI_ADJUST_CFA_OFFSET(-1)
> : "=D" (result),
> "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
> "=S" (dummy)
> - : "a" (command), "b" (function), "c" (0), "d" (smi_port),
> - "D" (0), "S" (magic)
> + : CFI_INPUTS("a" (command), "b" (function), "c" (0),
> + "d" (smi_port), "D" (0), "S" (magic))
> : "memory"
> );
>
> @@ -102,6 +104,7 @@ static int speedstep_smi_get_freqs(unsig
> u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
> u32 state = 0;
> u32 function = GET_SPEEDSTEP_FREQS;
> + CFI_DECL;
>
> if (!(ist_info.event & 0xFFFF)) {
> pr_debug("bug #1422 -- can't read freqs from BIOS\n");
> @@ -114,17 +117,15 @@ static int speedstep_smi_get_freqs(unsig
> command, smi_port);
>
> __asm__ __volatile__(
> - "push %%ebp\n"
> + "push %%ebp" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> "out %%al, (%%dx)\n"
> - "pop %%ebp"
> + "pop %%ebp" CFI_ADJUST_CFA_OFFSET(-1)
> : "=a" (result),
> "=b" (high_mhz),
> "=c" (low_mhz),
> "=d" (state), "=D" (edi), "=S" (dummy)
> - : "a" (command),
> - "b" (function),
> - "c" (state),
> - "d" (smi_port), "S" (0), "D" (0)
> + : CFI_INPUTS("a" (command), "b" (function), "c" (state),
> + "d" (smi_port), "S" (0), "D" (0))
> );
>
> pr_debug("result %x, low_freq %u, high_freq %u\n",
> @@ -165,6 +166,8 @@ static void speedstep_set_state(unsigned
> state, command, smi_port);
>
> do {
> + CFI_DECL;
> +
> if (retry) {
> pr_debug("retry %u, previous result %u, waiting...\n",
> retry, result);
> @@ -172,14 +175,15 @@ static void speedstep_set_state(unsigned
> }
> retry++;
> __asm__ __volatile__(
> - "push %%ebp\n"
> + "push %%ebp" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> "out %%al, (%%dx)\n"
> - "pop %%ebp"
> + "pop %%ebp" CFI_ADJUST_CFA_OFFSET(-1)
> : "=b" (new_state), "=D" (result),
> "=c" (dummy), "=a" (dummy),
> "=d" (dummy), "=S" (dummy)
> - : "a" (command), "b" (function), "c" (state),
> - "d" (smi_port), "S" (0), "D" (0)
> + : CFI_INPUTS("a" (command), "b" (function),
> + "c" (state), "d" (smi_port),
> + "S" (0), "D" (0))
> );
> } while ((new_state != state) && (retry <= SMI_TRIES));
>
> --- 3.18-rc3/drivers/crypto/padlock-aes.c
> +++ 3.18-rc3-x86-inline-unwind-info/drivers/crypto/padlock-aes.c
> @@ -21,6 +21,7 @@
> #include <linux/slab.h>
> #include <asm/cpu_device_id.h>
> #include <asm/byteorder.h>
> +#include <asm/frame.h>
> #include <asm/processor.h>
> #include <asm/i387.h>
>
> @@ -168,12 +169,13 @@ static inline void padlock_reset_key(str
> {
> int cpu = raw_smp_processor_id();
>
> - if (cword != per_cpu(paes_last_cword, cpu))
> -#ifndef CONFIG_X86_64
> - asm volatile ("pushfl; popfl");
> -#else
> - asm volatile ("pushfq; popfq");
> -#endif
> + if (cword != per_cpu(paes_last_cword, cpu)) {
> + CFI_DECL;
> +
> + asm volatile ("pushf" CFI_ADJUST_CFA_OFFSET(1) "\n\t"
> + "popf" CFI_ADJUST_CFA_OFFSET(-1)
> + :: CFI_INPUTS());
> + }
> }
>
> static inline void padlock_store_cword(struct cword *cword)
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/