Re: [RFC] perf_events: how to add Intel LBR support

From: Stephane Eranian
Date: Mon Feb 22 2010 - 09:07:52 EST


Hi,

On Thu, Feb 18, 2010 at 11:25 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> On Sun, 2010-02-14 at 11:12 +0100, Peter Zijlstra wrote:
>>
>> Dealing with context switches is also going to be tricky, where we have
>> to safe and 'restore' LBR stacks for per-task counters.
>
> OK, so I poked at the LBR hardware a bit, sadly the TOS really doesn't
> count beyond the few bits it requires :-(
>

The TOS is also a read-only MSR.

> I had hopes it would, since that would make it easier to share the LBR,
> simply take a TOS snapshot when you schedule the counter in, and never
> roll back further for that particular counter.
>
> As it stands we'll have to wipe the full LBR state every time we 'touch'
> it, which makes it less useful for cpu-bound counters.
>
Yes, you need to clean it up each time you snapshot it and each time
you restore it.

The patch does not seem to handle LBR context switches.

> Also, not all hw (core and pentium-m) supports the freeze_lbrs_on_pmi
> bit, what we could do for those is stick an unconditional LBR disable
> very early in the NMI path and simply roll back the stack until we hit a
> branch into the NMI vector, that should leave a few usable LBR entries.
>
You need to be consistent across the CPUs. If a CPU does not provide
freeze_on_pmi, then I would simply not support it as a first approach.
Same thing if the LBR is less than 4-deep. I don't think you'll get anything
useful out of it.


> For AMD and P6 there is only a single LBR record, AMD seems to freeze
> the thing on #DB traps but the PMI isn't qualified as one afaict,
> rendering the single entry useless (didn't look at the P6 details).
>
> hackery below..

The patch does not address the configuration options available on Intel
Nehalem/Westmere, i.e., LBR_SELECT (see Vol 3a table 16-9). We can
handle priv level separately as it can be derived from the event exclude_*.
But it you want to allow multiple events in a group to use PERF_SAMPLE_LBR
then you need to ensure LBR_SELECT is set to the same value, priv levels
included.

Furthermore, LBR_SELECT is shared between HT threads. We need to either
add another field in perf_event_attr or encode this in the config
field, though it
is ugly because unrelated to the event but rather to the sample_type.

The patch is missing the sampling part, i.e., dump of the LBR (in sequential
order) into the sampling buffer.

I would also select a better name than PERF_SAMPLE_LBR. LBR is an
Intel thing. Maybe PERF_SAMPLE_TAKEN_BRANCH.

> ---
> Âarch/x86/include/asm/perf_event.h | Â 24 +++
> Âarch/x86/kernel/cpu/perf_event.c Â| Â233 +++++++++++++++++++++++++++++++++++---
> Âarch/x86/kernel/traps.c      |  Â3
> Âinclude/linux/perf_event.h    Â|  Â7 -
> Â4 files changed, 251 insertions(+), 16 deletions(-)
>
> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
> @@ -104,6 +104,10 @@ struct amd_nb {
> Â Â Â Âstruct event_constraint event_constraints[X86_PMC_IDX_MAX];
> Â};
>
> +struct lbr_entry {
> + Â Â Â u64 from, to, flags;
> +};
> +
> Âstruct cpu_hw_events {
>    Âstruct perf_event    *events[X86_PMC_IDX_MAX]; /* in counter order */
>    Âunsigned long      active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
> @@ -117,6 +121,10 @@ struct cpu_hw_events {
> Â Â Â Âu64 Â Â Â Â Â Â Â Â Â Â tags[X86_PMC_IDX_MAX];
>    Âstruct perf_event    *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
>    Âstruct amd_nb      *amd_nb;
> +
> +    int           lbr_users;
> +    int           lbr_entries;
> +    struct lbr_entry    Âlbr_stack[16];
> Â};
>
> Â#define __EVENT_CONSTRAINT(c, n, m, w) {\
> @@ -187,6 +195,19 @@ struct x86_pmu {
>    Âvoid      Â(*put_event_constraints)(struct cpu_hw_events *cpuc,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event);
> Â Â Â Âstruct event_constraint *event_constraints;
> +
> +    unsigned long  lbr_tos;
> +    unsigned long  lbr_from, lbr_to;
> +    int       lbr_nr;
> +    int       lbr_ctl;
> +    int       lbr_format;
> +};
> +
> +enum {
> + Â Â Â LBR_FORMAT_32 Â Â Â Â Â = 0x00,
> + Â Â Â LBR_FORMAT_LIP Â Â Â Â Â= 0x01,
> + Â Â Â LBR_FORMAT_EIP Â Â Â Â Â= 0x02,
> + Â Â Â LBR_FORMAT_EIP_FLAGS Â Â= 0x03,
> Â};
>
> Âstatic struct x86_pmu x86_pmu __read_mostly;
> @@ -1203,6 +1224,52 @@ static void intel_pmu_disable_bts(void)
> Â Â Â Âupdate_debugctlmsr(debugctlmsr);
> Â}
>
> +static void __intel_pmu_enable_lbr(void)
> +{
> + Â Â Â u64 debugctl;
> +
> + Â Â Â rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + Â Â Â debugctl |= x86_pmu.lbr_ctl;
> + Â Â Â wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void intel_pmu_enable_lbr(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!x86_pmu.lbr_nr)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â if (!cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_enable_lbr();
> +
> + Â Â Â cpuc->lbr_users++;
> +}
> +
> +static void __intel_pmu_disable_lbr(void)
> +{
> + Â Â Â u64 debugctl;
> +
> + Â Â Â rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + Â Â Â debugctl &= ~x86_pmu.lbr_ctl;
> + Â Â Â wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +static void intel_pmu_disable_lbr(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (!x86_pmu.lbr_nr)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â cpuc->lbr_users--;
> +
> + Â Â Â BUG_ON(cpuc->lbr_users < 0);
> +
> + Â Â Â if (!cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_disable_lbr();
> +}
> +
> Âstatic void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
> Â{
> Â Â Â Âstruct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> @@ -1402,6 +1469,9 @@ void hw_perf_disable(void)
> Â Â Â Âcpuc->enabled = 0;
> Â Â Â Âbarrier();
>
> + Â Â Â if (cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_disable_lbr();
> +
> Â Â Â Âx86_pmu.disable_all();
> Â}
>
> @@ -1703,6 +1773,10 @@ void hw_perf_enable(void)
> Â Â Â Âbarrier();
>
> Â Â Â Âx86_pmu.enable_all();
> +
> + Â Â Â // XXX
> + Â Â Â if (cpuc->lbr_users = 1)
> + Â Â Â Â Â Â Â __intel_pmu_enable_lbr();
> Â}
>
> Âstatic inline u64 intel_pmu_get_status(void)
> @@ -2094,7 +2168,6 @@ static void intel_pmu_drain_pebs_core(st
> Â Â Â Âstruct perf_event_header header;
> Â Â Â Âstruct perf_sample_data data;
> Â Â Â Âstruct pt_regs regs;
> - Â Â Â u64
>
> Â Â Â Âif (!event || !ds || !x86_pmu.pebs)
> Â Â Â Â Â Â Â Âreturn;
> @@ -2114,7 +2187,7 @@ static void intel_pmu_drain_pebs_core(st
>
> Â Â Â Âperf_prepare_sample(&header, &data, event, &regs);
>
> - Â Â Â event.hw.interrupts += (top - at);
> + Â Â Â event->hw.interrupts += (top - at);
> Â Â Â Âatomic64_add((top - at) * event->hw.last_period, &event->count);
>
> Â Â Â Âif (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
> @@ -2188,6 +2261,84 @@ static void intel_pmu_drain_pebs_nhm(str
> Â Â Â Â}
> Â}
>
> +static inline u64 intel_pmu_lbr_tos(void)
> +{
> + Â Â Â u64 tos;
> +
> + Â Â Â rdmsrl(x86_pmu.lbr_tos, tos);
> + Â Â Â return tos;
> +}
> +
> +static void
> +intel_pmu_read_lbr_32(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> + Â Â Â struct hw_perf_event *hwc = &event->hw;
> + Â Â Â unsigned long mask = x86_pmu.lbr_nr - 1;
> + Â Â Â u64 tos = intel_pmu_lbr_tos();
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; tos > hwc->lbr_tos && i < x86_pmu.lbr_nr; i++, tos--) {
> + Â Â Â Â Â Â Â unsigned long lbr_idx = (tos - i) & mask;
> + Â Â Â Â Â Â Â union {
> + Â Â Â Â Â Â Â Â Â Â Â struct {
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â u32 from;
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â u32 to;
> + Â Â Â Â Â Â Â Â Â Â Â };
> + Â Â Â Â Â Â Â Â Â Â Â u64 Â Â lbr;
> + Â Â Â Â Â Â Â } msr_lastbranch;
> +
> + Â Â Â Â Â Â Â rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
> +
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].from Â= msr_lastbranch.from;
> +        cpuc->lbr_stack[i].to  Â= msr_lastbranch.to;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].flags = 0;
> + Â Â Â }
> + Â Â Â cpuc->lbr_entries = i;
> +}
> +
> +#define LBR_FROM_FLAG_MISPRED Â(1ULL << 63)
> +
> +/*
> + * Due to lack of segmentation in Linux the effective address (offset)
> + * is the same as the linear address, allowing us to merge the LIP and EIP
> + * LBR formats.
> + */
> +static void
> +intel_pmu_read_lbr_64(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> + Â Â Â struct hw_perf_event *hwc = &event->hw;
> + Â Â Â unsigned long mask = x86_pmu.lbr_nr - 1;
> + Â Â Â u64 tos = intel_pmu_lbr_tos();
> + Â Â Â int i;
> +
> + Â Â Â for (i = 0; tos > hwc->lbr_tos && i < x86_pmu.lbr_nr; i++, tos--) {
> + Â Â Â Â Â Â Â unsigned long lbr_idx = (tos - i) & mask;
> + Â Â Â Â Â Â Â u64 from, to, flags = 0;
> +
> + Â Â Â Â Â Â Â rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
> +        rdmsrl(x86_pmu.lbr_to  + lbr_idx, to);
> +
> + Â Â Â Â Â Â Â if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
> + Â Â Â Â Â Â Â Â Â Â Â flags = !!(from & LBR_FROM_FLAG_MISPRED);
> + Â Â Â Â Â Â Â Â Â Â Â from = (u64)((((s64)from) << 1) >> 1);
> + Â Â Â Â Â Â Â }
> +
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].from Â= from;
> +        cpuc->lbr_stack[i].to  Â= to;
> + Â Â Â Â Â Â Â cpuc->lbr_stack[i].flags = flags;
> + Â Â Â }
> + Â Â Â cpuc->lbr_entries = i;
> +}
> +
> +static void
> +intel_pmu_read_lbr(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> + Â Â Â if (x86_pmu.lbr_format == LBR_FORMAT_32)
> + Â Â Â Â Â Â Â intel_pmu_read_lbr_32(cpuc, event);
> + Â Â Â else
> + Â Â Â Â Â Â Â intel_pmu_read_lbr_64(cpuc, event);
> +}
> +
> Âstatic void x86_pmu_stop(struct perf_event *event)
> Â{
> Â Â Â Âstruct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> @@ -2456,11 +2607,26 @@ perf_event_nmi_handler(struct notifier_b
> Â Â Â Â * If the first NMI handles both, the latter will be empty and daze
> Â Â Â Â * the CPU.
> Â Â Â Â */
> + Â Â Â trace_printk("LBR TOS: %Ld\n", intel_pmu_lbr_tos());
> Â Â Â Âx86_pmu.handle_irq(regs);
>
> Â Â Â Âreturn NOTIFY_STOP;
> Â}
>
> +static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> +    .notifier_call     Â= perf_event_nmi_handler,
> +    .next          = NULL,
> +    .priority        = 1
> +};
> +
> +void perf_nmi_exit(void)
> +{
> + Â Â Â struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> +
> + Â Â Â if (cpuc->lbr_users)
> + Â Â Â Â Â Â Â __intel_pmu_enable_lbr();
> +}
> +
> Âstatic struct event_constraint unconstrained; Â/* can schedule */
> Âstatic struct event_constraint null_constraint; /* can't schedule */
> Âstatic struct event_constraint bts_constraint =
> @@ -2761,12 +2927,6 @@ undo:
> Â Â Â Âreturn ret;
> Â}
>
> -static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> -    .notifier_call     Â= perf_event_nmi_handler,
> -    .next          = NULL,
> -    .priority        = 1
> -};
> -
> Âstatic __initconst struct x86_pmu p6_pmu = {
>    Â.name          = "p6",
>    Â.handle_irq       = x86_pmu_handle_irq,
> @@ -2793,7 +2953,7 @@ static __initconst struct x86_pmu p6_pmu
>    Â.event_bits       = 32,
>    Â.event_mask       = (1ULL << 32) - 1,
> Â Â Â Â.get_event_constraints Â= intel_get_event_constraints,
> -    .event_constraints   Â= intel_p6_event_constraints
> +    .event_constraints   Â= intel_p6_event_constraints,
> Â};
>
> Âstatic __initconst struct x86_pmu core_pmu = {
> @@ -2873,18 +3033,26 @@ static __init int p6_pmu_init(void)
> Â Â Â Âcase 7:
> Â Â Â Âcase 8:
> Â Â Â Âcase 11: /* Pentium III */
> + Â Â Â Â Â Â Â x86_pmu = p6_pmu;
> +
> + Â Â Â Â Â Â Â break;
> Â Â Â Âcase 9:
> - Â Â Â case 13:
> - Â Â Â Â Â Â Â /* Pentium M */
> + Â Â Â case 13: /* Pentium M */
> + Â Â Â Â Â Â Â x86_pmu = p6_pmu;
> +
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 8;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x40;
> +
> Â Â Â Â Â Â Â Âbreak;
> +
> Â Â Â Âdefault:
> Â Â Â Â Â Â Â Âpr_cont("unsupported p6 CPU model %d ",
> Â Â Â Â Â Â Â Â Â Â Â Âboot_cpu_data.x86_model);
> Â Â Â Â Â Â Â Âreturn -ENODEV;
> Â Â Â Â}
>
> - Â Â Â x86_pmu = p6_pmu;
> -
> Â Â Â Âreturn 0;
> Â}
>
> @@ -2925,6 +3093,9 @@ static __init int intel_pmu_init(void)
>    Âx86_pmu.event_bits       Â= eax.split.bit_width;
>    Âx86_pmu.event_mask       Â= (1ULL << eax.split.bit_width) - 1;
>
> + Â Â Â rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
> + Â Â Â x86_pmu.lbr_format = capabilities & 0x1f;
> +
> Â Â Â Â/*
> Â Â Â Â * Quirk: v2 perfmon does not report fixed-purpose events, so
> Â Â Â Â * assume at least 3 events:
> @@ -2973,6 +3144,10 @@ no_datastore:
> Â Â Â Â */
> Â Â Â Âswitch (boot_cpu_data.x86_model) {
> Â Â Â Âcase 14: /* 65 nm core solo/duo, "Yonah" */
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 8;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x40;
> Â Â Â Â Â Â Â Âpr_cont("Core events, ");
> Â Â Â Â Â Â Â Âbreak;
>
> @@ -2980,6 +3155,13 @@ no_datastore:
> Â Â Â Âcase 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
> Â Â Â Âcase 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
> Â Â Â Âcase 29: /* six-core 45 nm xeon "Dunnington" */
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 4;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR |
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â X86_DEBUGCTL_FREEZE_LBRS_ON_PMI;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x40;
> + Â Â Â Â Â Â Â x86_pmu.lbr_to = 0x60;
> +
> Â Â Â Â Â Â Â Âmemcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> @@ -2989,13 +3171,28 @@ no_datastore:
>
> Â Â Â Âcase 26: /* 45 nm nehalem, "Bloomfield" */
> Â Â Â Âcase 30: /* 45 nm nehalem, "Lynnfield" */
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 16;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR |
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â X86_DEBUGCTL_FREEZE_LBRS_ON_PMI;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x680;
> + Â Â Â Â Â Â Â x86_pmu.lbr_to = 0x6c0;
> +
> Â Â Â Â Â Â Â Âmemcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> Â Â Â Â Â Â Â Âx86_pmu.event_constraints = intel_nehalem_event_constraints;
> Â Â Â Â Â Â Â Âpr_cont("Nehalem/Corei7 events, ");
> Â Â Â Â Â Â Â Âbreak;
> - Â Â Â case 28:
> +
> + Â Â Â case 28: /* Atom */
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 8;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR |
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â X86_DEBUGCTL_FREEZE_LBRS_ON_PMI;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x40;
> + Â Â Â Â Â Â Â x86_pmu.lbr_to = 0x60;
> +
> Â Â Â Â Â Â Â Âmemcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> @@ -3005,12 +3202,20 @@ no_datastore:
>
> Â Â Â Âcase 37: /* 32 nm nehalem, "Clarkdale" */
> Â Â Â Âcase 44: /* 32 nm nehalem, "Gulftown" */
> + Â Â Â Â Â Â Â x86_pmu.lbr_nr = 16;
> + Â Â Â Â Â Â Â x86_pmu.lbr_tos = 0x01c9;
> + Â Â Â Â Â Â Â x86_pmu.lbr_ctl = X86_DEBUGCTL_LBR |
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ÂX86_DEBUGCTL_FREEZE_LBRS_ON_PMI;
> + Â Â Â Â Â Â Â x86_pmu.lbr_from = 0x680;
> + Â Â Â Â Â Â Â x86_pmu.lbr_to = 0x6c0;
> +
> Â Â Â Â Â Â Â Âmemcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
> Â Â Â Â Â Â Â Â Â Â Â sizeof(hw_cache_event_ids));
>
> Â Â Â Â Â Â Â Âx86_pmu.event_constraints = intel_westmere_event_constraints;
> Â Â Â Â Â Â Â Âpr_cont("Westmere events, ");
> Â Â Â Â Â Â Â Âbreak;
> +
> Â Â Â Âdefault:
> Â Â Â Â Â Â Â Â/*
> Â Â Â Â Â Â Â Â * default constraints for v2 and up
> Index: linux-2.6/arch/x86/include/asm/perf_event.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/perf_event.h
> +++ linux-2.6/arch/x86/include/asm/perf_event.h
> @@ -1,6 +1,8 @@
> Â#ifndef _ASM_X86_PERF_EVENT_H
> Â#define _ASM_X86_PERF_EVENT_H
>
> +#include <asm/msr.h>
> +
> Â/*
> Â* Performance event hw details:
> Â*/
> @@ -122,11 +124,31 @@ union cpuid10_edx {
> Âextern void init_hw_perf_events(void);
> Âextern void perf_events_lapic_init(void);
>
> +#define X86_DEBUGCTL_LBR Â Â Â Â Â Â Â (1 << 0)
> +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI Â Â Â Â(1 << 11)
> +
> +static __always_inline void perf_nmi_enter(void)
> +{
> + Â Â Â u64 debugctl;
> +
> + Â Â Â /*
> + Â Â Â Â* Unconditionally disable LBR so as to minimally pollute the LBR stack.
> + Â Â Â Â* XXX: paravirt will screw us over massive
> + Â Â Â Â*/
> + Â Â Â rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + Â Â Â debugctl &= ~X86_DEBUGCTL_LBR;
> + Â Â Â wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> +}
> +
> +extern void perf_nmi_exit(void);
> +
> Â#define PERF_EVENT_INDEX_OFFSET Â Â Â Â Â Â Â Â Â Â Â Â0
>
> Â#else
> Âstatic inline void init_hw_perf_events(void) Â Â Â Â Â { }
> -static inline void perf_events_lapic_init(void) Â Â Â Â{ }
> +static inline void perf_events_lapic_init(void) Â Â Â Â Â Â Â Â{ }
> +static inline void perf_nmi_enter(void) Â Â Â Â Â Â Â Â Â Â Â Â{ }
> +static inline void perf_nmi_exit(void) Â Â Â Â Â Â Â Â { }
> Â#endif
>
> Â#endif /* _ASM_X86_PERF_EVENT_H */
> Index: linux-2.6/arch/x86/kernel/traps.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/traps.c
> +++ linux-2.6/arch/x86/kernel/traps.c
> @@ -45,6 +45,7 @@
> Â#endif
>
> Â#include <asm/kmemcheck.h>
> +#include <asm/perf_event.h>
> Â#include <asm/stacktrace.h>
> Â#include <asm/processor.h>
> Â#include <asm/debugreg.h>
> @@ -442,6 +443,7 @@ static notrace __kprobes void default_do
> Âdotraplinkage notrace __kprobes void
> Âdo_nmi(struct pt_regs *regs, long error_code)
> Â{
> + Â Â Â perf_nmi_enter();
> Â Â Â Ânmi_enter();
>
> Â Â Â Âinc_irq_stat(__nmi_count);
> @@ -450,6 +452,7 @@ do_nmi(struct pt_regs *regs, long error_
> Â Â Â Â Â Â Â Âdefault_do_nmi(regs);
>
> Â Â Â Ânmi_exit();
> + Â Â Â perf_nmi_exit();
> Â}
>
> Âvoid stop_nmi(void)
> Index: linux-2.6/include/linux/perf_event.h
> ===================================================================
> --- linux-2.6.orig/include/linux/perf_event.h
> +++ linux-2.6/include/linux/perf_event.h
> @@ -125,8 +125,9 @@ enum perf_event_sample_format {
> Â Â Â ÂPERF_SAMPLE_PERIOD Â Â Â Â Â Â Â Â Â Â Â= 1U << 8,
> Â Â Â ÂPERF_SAMPLE_STREAM_ID Â Â Â Â Â Â Â Â Â = 1U << 9,
> Â Â Â ÂPERF_SAMPLE_RAW Â Â Â Â Â Â Â Â Â Â Â Â = 1U << 10,
> + Â Â Â PERF_SAMPLE_LBR Â Â Â Â Â Â Â Â Â Â Â Â = 1U << 11,
>
> - Â Â Â PERF_SAMPLE_MAX = 1U << 11, Â Â Â Â Â Â /* non-ABI */
> + Â Â Â PERF_SAMPLE_MAX = 1U << 12, Â Â Â Â Â Â /* non-ABI */
> Â};
>
> Â/*
> @@ -396,6 +397,9 @@ enum perf_event_type {
> Â Â Â Â * Â Â Â{ u64 Â Â Â Â Â Â Â Â Â nr,
> Â Â Â Â * Â Â Â Âu64 Â Â Â Â Â Â Â Â Â ips[nr]; Â} && PERF_SAMPLE_CALLCHAIN
> Â Â Â Â *
> + Â Â Â Â* Â Â Â{ u64 Â Â Â Â Â Â Â Â Â nr;
> +    Â*    Âstruct lbr_format   lbr[nr]; Â} && PERF_SAMPLE_LBR
> + Â Â Â Â*
> Â Â Â Â * Â Â Â#
> Â Â Â Â * Â Â Â# The RAW record below is opaque data wrt the ABI
> Â Â Â Â * Â Â Â#
> @@ -483,6 +487,7 @@ struct hw_perf_event {
>            Âint       idx;
>            Âint       last_cpu;
>            Âint       pebs;
> + Â Â Â Â Â Â Â Â Â Â Â u64 Â Â Â Â Â Â lbr_tos;
> Â Â Â Â Â Â Â Â};
> Â Â Â Â Â Â Â Âstruct { /* software */
> Â Â Â Â Â Â Â Â Â Â Â Âs64 Â Â Â Â Â Â remaining;
>
>
>
¢éì®&Þ~º&¶¬–+-±éÝ¥Šw®žË±Êâmébžìdz¹Þ)í…æèw*jg¬±¨¶‰šŽŠÝj/êäz¹ÞŠà2ŠÞ¨è­Ú&¢)ß«a¶Úþø®G«éh®æj:+v‰¨Šwè†Ù>Wš±êÞiÛaxPjØm¶Ÿÿà -»+ƒùdš_