[RFC][PATCH] x86, fpu: trace points

From: Dave Hansen
Date: Tue Nov 10 2015 - 15:44:30 EST



From: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

I've been carrying this patch around for a bit and it's helped me
solve at least a couple FPU-related issues. It's a _bit_ of a
hack and probably too indiscriminate for mainline.

But, I'd be really interested to get something similar in to
mainline.

How do folks feel about this as it stands? Could we do something
more structured?

Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
---

b/arch/x86/include/asm/fpu/internal.h | 5 +
b/arch/x86/include/asm/trace/fpu.h | 115 ++++++++++++++++++++++++++++++++++
b/arch/x86/kernel/fpu/core.c | 18 +++++
b/arch/x86/kernel/fpu/signal.c | 2
b/arch/x86/kvm/x86.c | 6 -
5 files changed, 143 insertions(+), 3 deletions(-)

diff -puN /dev/null arch/x86/include/asm/trace/fpu.h
--- /dev/null 2015-07-13 14:24:11.435656502 -0700
+++ b/arch/x86/include/asm/trace/fpu.h 2015-11-10 12:43:24.809347944 -0800
@@ -0,0 +1,115 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fpu
+
+#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FPU_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(fpu_state_event,
+
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu),
+
+ TP_STRUCT__entry(
+ __field(struct fpu *, fpu)
+ __field(bool, fpregs_active)
+ __field(bool, fpstate_active)
+ __field(int, counter)
+ __field(u64, xfeatures)
+ __field(u64, xcomp_bv)
+ ),
+
+ TP_fast_assign(
+ __entry->fpu = fpu;
+ __entry->fpregs_active = fpu->fpregs_active;
+ __entry->fpstate_active = fpu->fpstate_active;
+ __entry->counter = fpu->counter;
+ if (cpu_has_xsave) {
+ __entry->xfeatures = fpu->state.xsave.header.xfeatures;
+ __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
+ }
+ ),
+ TP_printk("fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+ __entry->fpu,
+ __entry->fpregs_active,
+ __entry->fpstate_active,
+ __entry->counter,
+ __entry->xfeatures,
+ __entry->xcomp_bv
+ )
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_state,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_before_save,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_after_save,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_before_restore,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_after_restore,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_regs_activated,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_regs_deactivated,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_activate_state,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_deactivate_state,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_init_state,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_dropped,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_copy_src,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(fpu_state_event, fpu_copy_dst,
+ TP_PROTO(struct fpu *fpu),
+ TP_ARGS(fpu)
+);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fpu
+#endif /* _TRACE_FPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff -puN arch/x86/kernel/fpu/core.c~fpu-trace-0 arch/x86/kernel/fpu/core.c
--- a/arch/x86/kernel/fpu/core.c~fpu-trace-0 2015-11-10 12:39:05.188577854 -0800
+++ b/arch/x86/kernel/fpu/core.c 2015-11-10 12:40:14.009697934 -0800
@@ -12,6 +12,9 @@

#include <linux/hardirq.h>

+#define CREATE_TRACE_POINTS
+#include <asm/trace/fpu.h>
+
/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
* depending on the FPU hardware format:
@@ -188,10 +191,12 @@ void fpu__save(struct fpu *fpu)
WARN_ON_FPU(fpu != &current->thread.fpu);

preempt_disable();
+ trace_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu))
fpregs_deactivate(fpu);
}
+ trace_fpu_after_save(fpu);
preempt_enable();
}
EXPORT_SYMBOL_GPL(fpu__save);
@@ -273,6 +278,9 @@ int fpu__copy(struct fpu *dst_fpu, struc
if (src_fpu->fpstate_active && cpu_has_fpu)
fpu_copy(dst_fpu, src_fpu);

+ trace_fpu_copy_src(src_fpu);
+ trace_fpu_copy_dst(dst_fpu);
+
return 0;
}

@@ -286,7 +294,9 @@ void fpu__activate_curr(struct fpu *fpu)

if (!fpu->fpstate_active) {
fpstate_init(&fpu->state);
+ trace_fpu_init_state(fpu);

+ trace_fpu_activate_state(fpu);
/* Safe to do for the current task: */
fpu->fpstate_active = 1;
}
@@ -312,7 +322,9 @@ void fpu__activate_fpstate_read(struct f
} else {
if (!fpu->fpstate_active) {
fpstate_init(&fpu->state);
+ trace_fpu_init_state(fpu);

+ trace_fpu_activate_state(fpu);
/* Safe to do for current and for stopped child tasks: */
fpu->fpstate_active = 1;
}
@@ -345,7 +357,9 @@ void fpu__activate_fpstate_write(struct
fpu->last_cpu = -1;
} else {
fpstate_init(&fpu->state);
+ trace_fpu_init_state(fpu);

+ trace_fpu_activate_state(fpu);
/* Safe to do for stopped child tasks: */
fpu->fpstate_active = 1;
}
@@ -367,9 +381,11 @@ void fpu__restore(struct fpu *fpu)

/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
+ trace_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
fpu->counter++;
+ trace_fpu_after_restore(fpu);
kernel_fpu_enable();
}
EXPORT_SYMBOL_GPL(fpu__restore);
@@ -398,6 +414,8 @@ void fpu__drop(struct fpu *fpu)

fpu->fpstate_active = 0;

+ trace_fpu_dropped(fpu);
+
preempt_enable();
}

diff -puN arch/x86/include/asm/fpu/internal.h~fpu-trace-0 arch/x86/include/asm/fpu/internal.h
--- a/arch/x86/include/asm/fpu/internal.h~fpu-trace-0 2015-11-10 12:39:05.193578081 -0800
+++ b/arch/x86/include/asm/fpu/internal.h 2015-11-10 12:39:05.208578761 -0800
@@ -17,6 +17,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/trace/fpu.h>

/*
* High level FPU state handling functions:
@@ -527,6 +528,7 @@ static inline void __fpregs_deactivate(s

fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+ trace_fpu_regs_deactivated(fpu);
}

/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
@@ -536,6 +538,7 @@ static inline void __fpregs_activate(str

fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+ trace_fpu_regs_activated(fpu);
}

/*
@@ -606,11 +609,13 @@ switch_fpu_prepare(struct fpu *old_fpu,

/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
+ trace_fpu_regs_deactivated(old_fpu);

/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
new_fpu->counter++;
__fpregs_activate(new_fpu);
+ trace_fpu_regs_activated(new_fpu);
prefetch(&new_fpu->state);
} else {
__fpregs_deactivate_hw();
diff -puN arch/x86/kernel/fpu/signal.c~fpu-trace-0 arch/x86/kernel/fpu/signal.c
--- a/arch/x86/kernel/fpu/signal.c~fpu-trace-0 2015-11-10 12:39:05.195578171 -0800
+++ b/arch/x86/kernel/fpu/signal.c 2015-11-10 12:39:05.208578761 -0800
@@ -10,6 +10,7 @@
#include <asm/fpu/regset.h>

#include <asm/sigframe.h>
+#include <asm/trace/fpu.h>

static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;

@@ -311,6 +312,7 @@ static int __fpu__restore_sig(void __use
if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
__copy_from_user(&env, buf, sizeof(env))) {
fpstate_init(&fpu->state);
+ trace_fpu_init_state(fpu);
err = -1;
} else {
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
diff -puN arch/x86/kvm/x86.c~fpu-trace-0 arch/x86/kvm/x86.c
--- a/arch/x86/kvm/x86.c~fpu-trace-0 2015-11-10 12:39:05.202578489 -0800
+++ b/arch/x86/kvm/x86.c 2015-11-10 12:39:05.211578897 -0800
@@ -55,9 +55,6 @@
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>

-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/desc.h>
@@ -68,6 +65,9 @@
#include <asm/div64.h>
#include <asm/irq_remapping.h>

+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/