Re: [RFC,PATCH 2/2] perf, x86: Utilize the LBRs for machine/oops debugging

From: Stephane Eranian
Date: Mon Mar 29 2010 - 08:47:34 EST


On Mon, Mar 29, 2010 at 1:20 PM, Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> wrote:
> The LBRs are relatively cheap to keep enabled and provide some history
> to OOPSen, also some CPUs are reported to keep them over soft-reset,
> which allows us to use them to debug things like tripple faults.
>
> Therefore introduce a boot option: lbr_debug=on, which always enable
> the LBRs and will print the LBRs on CPU init and die().
>

When this is enabled, it will prevent changing the LBR configuration to
record only selected branches. Unless you are willing to accept filtered
content in the kernel dump.


> Requested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> ---
> Âarch/x86/include/asm/perf_event.h     Â|  Â7 ++
> Âarch/x86/kernel/cpu/perf_event_intel.c   |  Â5 -
> Âarch/x86/kernel/cpu/perf_event_intel_lbr.c | Â 86 +++++++++++++++++++++++++++--
> Âarch/x86/kernel/dumpstack.c        Â|  Â5 +
> Â4 files changed, 95 insertions(+), 8 deletions(-)
>
> Index: linux-2.6/arch/x86/include/asm/perf_event.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/perf_event.h
> +++ linux-2.6/arch/x86/include/asm/perf_event.h
> @@ -155,9 +155,14 @@ extern void perf_events_lapic_init(void)
>
> Â#define perf_instruction_pointer(regs) ((regs)->ip)
>
> +void dump_lbr_state(void);
> +void lbr_off(void);
> +
> Â#else
> Âstatic inline void init_hw_perf_events(void) Â Â Â Â Â { }
> -static inline void perf_events_lapic_init(void) Â Â Â Â{ }
> +static inline void perf_events_lapic_init(void) Â Â Â Â Â Â Â Â{ }
> +static inline void dump_lbr_state(void) Â Â Â Â Â Â Â Â Â Â Â Â{ }
> +static inline void lbr_off(void) Â Â Â Â Â Â Â Â Â Â Â { }
> Â#endif
>
> Â#endif /* _ASM_X86_PERF_EVENT_H */
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -804,10 +804,7 @@ static __initconst const struct x86_pmu
> Âstatic void intel_pmu_cpu_starting(int cpu)
> Â{
> Â Â Â Âinit_debug_store_on_cpu(cpu);
> - Â Â Â /*
> - Â Â Â Â* Deal with CPUs that don't clear their LBRs on power-up.
> - Â Â Â Â*/
> - Â Â Â intel_pmu_lbr_reset();
> + Â Â Â intel_pmu_lbr_starting();
> Â}
>
> Âstatic void intel_pmu_cpu_dying(int cpu)
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -1,12 +1,32 @@
> Â#ifdef CONFIG_CPU_SUP_INTEL
>
> Âenum {
> + Â Â Â LBR_DEBUG_OFF Â Â Â Â Â = 0,
> + Â Â Â LBR_DEBUG_ON Â Â Â Â Â Â= 1,
> +};
> +
> +static int lbr_debug_state __read_mostly;
> +
> +static int __init setup_lbr_debug(char *str)
> +{
> + Â Â Â if (!strcmp(str, "on"))
> + Â Â Â Â Â Â Â lbr_debug_state = LBR_DEBUG_ON;
> + Â Â Â else
> + Â Â Â Â Â Â Â return 0;
> + Â Â Â return 1;
> +}
> +
> +__setup("lbr_debug=", setup_lbr_debug);
> +
> +enum {
> Â Â Â ÂLBR_FORMAT_32 Â Â Â Â Â = 0x00,
> Â Â Â ÂLBR_FORMAT_LIP Â Â Â Â Â= 0x01,
> Â Â Â ÂLBR_FORMAT_EIP Â Â Â Â Â= 0x02,
> Â Â Â ÂLBR_FORMAT_EIP_FLAGS Â Â= 0x03,
> Â};
>
> +static DEFINE_PER_CPU(int, lbr_print_done);
> +
> Â/*
> Â* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
> Â* otherwise it becomes near impossible to get a reliable stack.
> @@ -50,9 +70,6 @@ static void intel_pmu_lbr_reset_64(void)
>
> Âstatic void intel_pmu_lbr_reset(void)
> Â{
> - Â Â Â if (!x86_pmu.lbr_nr)
> - Â Â Â Â Â Â Â return;
> -
> Â Â Â Âif (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
> Â Â Â Â Â Â Â Âintel_pmu_lbr_reset_32();
> Â Â Â Âelse
> @@ -182,6 +199,8 @@ static void intel_pmu_lbr_read(void)
> Â{
> Â Â Â Âstruct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>
> + Â Â Â cpuc->lbr_stack.nr = 0;
> +
> Â Â Â Âif (!cpuc->lbr_users)
> Â Â Â Â Â Â Â Âreturn;
>
> @@ -215,4 +234,65 @@ static void intel_pmu_lbr_init_atom(void
>    Âx86_pmu.lbr_to   = 0x60;
> Â}
>
> +static void __dump_lbr_state(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> + Â Â Â int i;
> +
> + Â Â Â intel_pmu_lbr_read();
> + Â Â Â for (i = 0; i < cpuc->lbr_stack.nr; i++) {
> + Â Â Â Â Â Â Â printk(KERN_DEBUG "CPU%d LBR%d: %pF -> %pF\n",
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â smp_processor_id(), i,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (void *)cpuc->lbr_entries[i].from,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â (void *)cpuc->lbr_entries[i].to);
> + Â Â Â }
> +}
> +
> +static void intel_pmu_lbr_starting(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!x86_pmu.lbr_nr)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â cpuc->lbr_users = 1;
> + Â Â Â if (lbr_debug_state && !__get_cpu_var(lbr_print_done)) {
> + Â Â Â Â Â Â Â __get_cpu_var(lbr_print_done) = 1;
> + Â Â Â Â Â Â Â __dump_lbr_state();
> + Â Â Â }
> +
> + Â Â Â intel_pmu_lbr_reset();
> +
> + Â Â Â if (lbr_debug_state)
> + Â Â Â Â Â Â Â __intel_pmu_lbr_enable();
> + Â Â Â else
> + Â Â Â Â Â Â Â cpuc->lbr_users = 0;
> +}
> +
> +void dump_lbr_state(void)
> +{
> + Â Â Â if (!lbr_debug_state)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â __dump_lbr_state();
> +}
> +
> +void lbr_off(void)
> +{
> + Â Â Â if (!lbr_debug_state)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â __intel_pmu_lbr_disable();
> +}
> +
> +#else
> +
> +void dump_lbr_state(void)
> +{
> +}
> +
> +void lbr_off(void)
> +{
> +}
> +
> Â#endif /* CONFIG_CPU_SUP_INTEL */
> Index: linux-2.6/arch/x86/kernel/dumpstack.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/dumpstack.c
> +++ linux-2.6/arch/x86/kernel/dumpstack.c
> @@ -17,6 +17,7 @@
> Â#include <linux/sysfs.h>
>
> Â#include <asm/stacktrace.h>
> +#include <asm/perf_event.h>
>
> Â#include "dumpstack.h"
>
> @@ -224,6 +225,8 @@ unsigned __kprobes long oops_begin(void)
> Â Â Â Âint cpu;
> Â Â Â Âunsigned long flags;
>
> + Â Â Â lbr_off();
> +
> Â Â Â Âoops_enter();
>
> Â Â Â Â/* racy, but better than risking deadlock. */
> @@ -306,6 +309,7 @@ int __kprobes __die(const char *str, str
> Â Â Â Âprintk_address(regs->ip, 1);
> Â Â Â Âprintk(" RSP <%016lx>\n", regs->sp);
> Â#endif
> + Â Â Â dump_lbr_state();
> Â Â Â Âreturn 0;
> Â}
>
> @@ -343,6 +347,7 @@ die_nmi(char *str, struct pt_regs *regs,
> Â Â Â Âprintk(" on CPU%d, ip %08lx, registers:\n",
> Â Â Â Â Â Â Â Âsmp_processor_id(), regs->ip);
> Â Â Â Âshow_registers(regs);
> + Â Â Â dump_lbr_state();
> Â Â Â Âoops_end(flags, regs, 0);
> Â Â Â Âif (do_panic || panic_on_oops)
> Â Â Â Â Â Â Â Âpanic("Non maskable interrupt");
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/