Re: [PATCH] [RFC] tracing xsave operations

From: Steven Rostedt
Date: Thu Apr 23 2015 - 17:10:50 EST


On Wed, 22 Apr 2015 16:42:12 -0700
Dave Hansen <dave@xxxxxxxx> wrote:

>
> There has been a glut of tough debug issues lately that all come
> back to the xsave code. This trace point helped us find and fix
> a long-lived bug today, just from the data it showed.
>
> But, the xsave operations are in a header and they're inlined all
> over the place. Including a tracepoint header in to another header
> is causing some interesting issues.
>
> For instance arch/x86/kvm/x86.c's CREATE_TRACE_POINTS comes before
> the last #include, so it tried to create *MY* tracepoints too. It
> also looks like we need to be extra careful to #undef our
> TRACE_SYSTEM, TRACE_INCLUDE_FILE, TRACE_INCLUDE_PATH #defines for
> each trace header, or risk seeing them double-defined.

Yeah, tracepoints really should not be used in header files, or for
static inlines for that matter.

A "trace_foo()" expands into a bit of code, that should not be called
all over the place. It just bloats the kernel.

One solution is to hard code a jump label and do:

extern struct tracepoint __tracepoint_foo;

[...]

if (static_key_false(&__tracepoint_foo.key)) {
call_trace_foo(x);

Where call_trace_foo(x) is a function that does the tracepoint for foo:

void call_trace_foo(arg x)
{
trace_foo(x);
}

This way you still get the benefit of the "zero" overhead of a disabled
tracepoint, but you also avoid include hell and keep all tracepoint
code out of headers.

-- Steve

>
> I'm also seeing some odd redefinition warnings. When I go look at
> ftrace_event.h:609, I don't see anything remotely xsave or FPU
> related:
>
> arch/x86/mm/built-in.o: In function `perf_trace_buf_submit':
> include/linux/ftrace_event.h:609: multiple definition of `__tracepoint_cpu_state_xsave'
> arch/x86/kernel/built-in.o:(__tracepoints+0x5c0): first defined here
> arch/x86/mm/built-in.o: In function `perf_fetch_caller_regs':
> include/linux/perf_event.h:688: multiple definition of `__tracepoint_cpu_state_xrstor'
> arch/x86/kernel/built-in.o:linux.git/arch/x86/kernel/process_64.c:256: first defined here
> make[2]: *** [arch/x86/built-in.o] Error 1
>
> Am I doing something wrong? Are there good alternatives for when
> we want to trace something that's done in a header?
>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: ak@xxxxxxxxxxxxxxx
>
> ---
>
> b/arch/x86/include/asm/fpu-internal.h | 8 +-
> b/arch/x86/include/asm/trace/cpustate.h | 105 ++++++++++++++++++++++++++++++++
> b/arch/x86/include/asm/trace/mpx.h | 1
> b/arch/x86/include/asm/xsave.h | 15 +++-
> b/arch/x86/kernel/xsave.c | 24 +++++++
> b/arch/x86/kvm/x86.c | 6 -
> b/include/trace/events/syscalls.h | 3
> 7 files changed, 155 insertions(+), 7 deletions(-)
>
> diff -puN arch/x86/include/asm/fpu-internal.h~xsave-trace-0 arch/x86/include/asm/fpu-internal.h
> --- a/arch/x86/include/asm/fpu-internal.h~xsave-trace-0 2015-04-22 16:30:57.151296546 -0700
> +++ b/arch/x86/include/asm/fpu-internal.h 2015-04-22 16:31:19.482303717 -0700
> @@ -22,6 +22,7 @@
> #include <asm/uaccess.h>
> #include <asm/xsave.h>
> #include <asm/smap.h>
> +#include <asm/trace/cpustate.h>
>
> #ifdef CONFIG_X86_64
> # include <asm/sigcontext32.h>
> @@ -532,10 +533,13 @@ static inline void user_fpu_begin(void)
> static inline void __save_fpu(struct task_struct *tsk)
> {
> if (use_xsave()) {
> + u64 requested_features = -1;
> + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
> if (unlikely(system_state == SYSTEM_BOOTING))
> - xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
> + xsave_state_booting(xsave, requested_features);
> else
> - xsave_state(&tsk->thread.fpu.state->xsave, -1);
> + xsave_state(xsave, requested_features);
> + trace_cpu_state_xsave(xsave, requested_features);
> } else
> fpu_fxsave(&tsk->thread.fpu);
> }
> diff -puN /dev/null arch/x86/include/asm/trace/cpustate.h
> --- /dev/null 2014-10-10 16:10:57.316716958 -0700
> +++ b/arch/x86/include/asm/trace/cpustate.h 2015-04-22 16:30:57.181297899 -0700
> @@ -0,0 +1,105 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM cpu_state
> +
> +#ifdef CREATE_TRACE_POINTS
> +#warning defining trace points foo
> +#endif
> +
> +#if !defined(_TRACE_CPU_STATE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CPU_STATE_H
> +
> +#include <linux/tracepoint.h>
> +
> +#ifndef _TRACE_CPU_STATE_H_HELPERS
> +#define _TRACE_CPU_STATE_H_HELPERS
> +enum xsave_mode {
> + XSAVE_MODE_XSAVE,
> + XSAVE_MODE_XSAVEOPT,
> + XSAVE_MODE_XSAVES,
> +};
> +
> +static inline enum xsave_mode get_xsave_mode(void)
> +{
> + if (system_state == SYSTEM_BOOTING) {
> + if (boot_cpu_has(X86_FEATURE_XSAVES))
> + return XSAVE_MODE_XSAVES;
> + return XSAVE_MODE_XSAVE;
> + }
> + if (boot_cpu_has(X86_FEATURE_XSAVES))
> + return XSAVE_MODE_XSAVES;
> + if (boot_cpu_has(X86_FEATURE_XSAVEOPT))
> + return XSAVE_MODE_XSAVEOPT;
> + return XSAVE_MODE_XSAVE;
> +}
> +#endif /* _TRACE_CPU_STATE_H_HELPERS */
> +
> +extern int xstate_bytes_saved(u64 xstate_bv);
> +
> +TRACE_EVENT(cpu_state_xsave,
> +
> + TP_PROTO(struct xsave_struct *xsave, u64 req_features),
> + TP_ARGS(xsave, req_features),
> +
> + TP_STRUCT__entry(
> + __field(u64, xstate_bv)
> + __field(u64, xcomp_bv)
> + __field(u64, req_features)
> + __field(enum xsave_mode, mode)
> + ),
> +
> + TP_fast_assign(
> + __entry->xstate_bv = xsave->xsave_hdr.xstate_bv;
> + __entry->xcomp_bv = xsave->xsave_hdr.xcomp_bv;
> + __entry->req_features = req_features;
> + __entry->mode = get_xsave_mode();
> + ),
> +
> + TP_printk("mode: %d xstate_bv: 0x%llx xcomp_bv: 0x%llx size: %d requested features: %llx",
> + __entry->mode,
> + __entry->xstate_bv,
> + __entry->xcomp_bv,
> + xstate_bytes_saved(__entry->xstate_bv),
> + __entry->req_features
> + )
> +);
> +
> +TRACE_EVENT(cpu_state_xrstor,
> +
> + TP_PROTO(struct xsave_struct *xsave, u64 mask, int ret),
> + TP_ARGS(xsave, mask, ret),
> +
> + TP_STRUCT__entry(
> + __field(u64, xstate_bv)
> + __field(u64, xcomp_bv)
> + __field(enum xsave_mode, mode)
> + __field(int, ret)
> + __field(u64, mask)
> + ),
> +
> + TP_fast_assign(
> + __entry->xstate_bv = xsave->xsave_hdr.xstate_bv;
> + __entry->xcomp_bv = xsave->xsave_hdr.xcomp_bv;
> + __entry->mode = get_xsave_mode();
> + __entry->ret = ret;
> + __entry->mask = mask;
> + ),
> +
> + TP_printk("mode: %d xstate_bv: 0x%llx xcomp_bv: 0x%llx size: %d ret: %d mask: %llx",
> + __entry->mode,
> + __entry->xstate_bv,
> + __entry->xcomp_bv,
> + xstate_bytes_saved(__entry->xstate_bv),
> + __entry->ret,
> + __entry->mask
> + )
> +);
> +
> +
> +#undef TRACE_INCLUDE_PATH
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_PATH asm/trace/
> +#define TRACE_INCLUDE_FILE cpustate
> +#endif /* _TRACE_CPU_STATE_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff -puN include/trace/events/syscalls.h~xsave-trace-0 include/trace/events/syscalls.h
> --- a/include/trace/events/syscalls.h~xsave-trace-0 2015-04-22 16:30:57.153296636 -0700
> +++ b/include/trace/events/syscalls.h 2015-04-22 16:30:57.181297899 -0700
> @@ -1,6 +1,9 @@
> #undef TRACE_SYSTEM
> #define TRACE_SYSTEM raw_syscalls
> +#undef TRACE_INCLUDE_FILE
> #define TRACE_INCLUDE_FILE syscalls
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH trace/events
>
> #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
> #define _TRACE_EVENTS_SYSCALLS_H
> diff -puN arch/x86/kernel/xsave.c~xsave-trace-0 arch/x86/kernel/xsave.c
> --- a/arch/x86/kernel/xsave.c~xsave-trace-0 2015-04-22 16:30:57.155296727 -0700
> +++ b/arch/x86/kernel/xsave.c 2015-04-22 16:30:57.182297945 -0700
> @@ -15,6 +15,10 @@
> #include <asm/tlbflush.h>
> #include <asm/xcr.h>
>
> +#define CREATE_DAVE_TRACE_POINTS
> +#define CREATE_TRACE_POINTS
> +#include <asm/trace/cpustate.h>
> +
> /*
> * Supported feature mask by the CPU and the kernel.
> */
> @@ -555,6 +559,8 @@ static void __init setup_xstate_features
>
> xstate_offsets[leaf] = ebx;
> xstate_sizes[leaf] = eax;
> + printk("xstate[%2d]: offset: %4d size: %3d align: %d\n",
> + leaf, ebx, eax, ecx);
> }
> }
>
> @@ -638,6 +644,7 @@ static void __init setup_init_fpu_buf(vo
> * of any feature which is not represented by all zero's.
> */
> xsave_state_booting(init_xstate_buf, -1);
> + //trace_cpu_state_xsave(init_xstate_buf, -1);
> }
>
> static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
> @@ -838,3 +845,20 @@ void *get_xsave_field(int xsave_field)
>
> return get_xsave_addr(&xstate->xsave, xsave_field);
> }
> +
> +int xstate_bytes_saved(u64 xstate_bv)
> +{
> + int i;
> + int ret;
> +
> + if (!cpu_has_xsaves)
> + return user_xstate_size;
> +
> + ret = FXSAVE_SIZE + XSAVE_HDR_SIZE;
> +
> + for (i = 2; i < xstate_features; i++) {
> + if (xstate_bv & (1 << i))
> + ret += xstate_sizes[i];
> + }
> + return ret;
> +}
> diff -puN arch/x86/kvm/x86.c~xsave-trace-0 arch/x86/kvm/x86.c
> --- a/arch/x86/kvm/x86.c~xsave-trace-0 2015-04-22 16:30:57.159296906 -0700
> +++ b/arch/x86/kvm/x86.c 2015-04-22 16:30:57.184298034 -0700
> @@ -51,9 +51,6 @@
> #include <linux/pvclock_gtod.h>
> #include <trace/events/kvm.h>
>
> -#define CREATE_TRACE_POINTS
> -#include "trace.h"
> -
> #include <asm/debugreg.h>
> #include <asm/msr.h>
> #include <asm/desc.h>
> @@ -65,6 +62,9 @@
> #include <asm/pvclock.h>
> #include <asm/div64.h>
>
> +#define CREATE_TRACE_POINTS
> +#include "trace.h"
> +
> #define MAX_IO_MSRS 256
> #define KVM_MAX_MCE_BANKS 32
> #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> diff -puN arch/x86/include/asm/xsave.h~xsave-trace-0 arch/x86/include/asm/xsave.h
> --- a/arch/x86/include/asm/xsave.h~xsave-trace-0 2015-04-22 16:30:57.161296997 -0700
> +++ b/arch/x86/include/asm/xsave.h 2015-04-22 16:30:57.185298079 -0700
> @@ -3,6 +3,7 @@
>
> #include <linux/types.h>
> #include <asm/processor.h>
> +#include <asm/trace/cpustate.h>
>
> #define XSTATE_CPUID 0x0000000d
>
> @@ -91,6 +92,7 @@ static inline int xsave_state_booting(st
> xstate_fault
> : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
> : "memory");
> +
> return err;
> }
>
> @@ -182,6 +184,8 @@ static inline int xrstor_state(struct xs
> : "0" (0)
> : "memory");
>
> + trace_cpu_state_xrstor(fx, mask, err);
> +
> return err;
> }
>
> @@ -190,7 +194,10 @@ static inline int xrstor_state(struct xs
> */
> static inline void fpu_xsave(struct fpu *fpu)
> {
> - xsave_state(&fpu->state->xsave, -1);
> + /* Requested Feature BitMap */
> + u64 rfbm = -1;
> + xsave_state(&fpu->state->xsave, rfbm);
> + trace_cpu_state_xsave(&fpu->state->xsave, rfbm);
> }
>
> /*
> @@ -198,7 +205,11 @@ static inline void fpu_xsave(struct fpu
> */
> static inline int fpu_xrstor_checking(struct xsave_struct *fx)
> {
> - return xrstor_state(fx, -1);
> + /* Requested Feature BitMap */
> + u64 rfbm = -1;
> + int ret = xrstor_state(fx, rfbm);
> + trace_cpu_state_xrstor(fx, rfbm, ret);
> + return ret;
> }
>
> /*
> diff -puN arch/x86/include/asm/trace/mpx.h~xsave-trace-0 arch/x86/include/asm/trace/mpx.h
> --- a/arch/x86/include/asm/trace/mpx.h~xsave-trace-0 2015-04-22 16:30:57.172297493 -0700
> +++ b/arch/x86/include/asm/trace/mpx.h 2015-04-22 16:30:57.185298079 -0700
> @@ -114,6 +114,7 @@ static inline void trace_bounds_exceptio
>
> #undef TRACE_INCLUDE_PATH
> #define TRACE_INCLUDE_PATH asm/trace/
> +#undef TRACE_INCLUDE_FILE
> #define TRACE_INCLUDE_FILE mpx
> #endif /* _TRACE_MPX_H */
>
> _

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/