Re: [PATCH 1/2] perf-events: Add support for supplementary eventregisters v2

From: Stephane Eranian
Date: Fri Nov 12 2010 - 12:17:56 EST


I looked at this patch thinking how could this be reused for LBR_SELECT.

I am wondering if storing the extra MSR value in attr.config is really the way
to go now as opposed to adding/overloading a field.

For OFFCORE_RESPONSE, it makes sense to use attr.config because this is
a refinement of the event (a sub-sub event if you want).

For LBR_SELECT, you also need to pass a value, but there is no specific event
associated with it. You can enable LBR with any event you want. Thus,
storing the
LBR_SELECT value independently would also make sense. And if we have this
field for LBR_SELECT then we may as well use it for OFFCORE_REPONSE.

The alternative would be to consider LBR_SELECT also as a refinement of
the event being measured. Though by itself, it wouldn't do anything, it would
have to be combine with a PERF_SAMPLE_BRANCH_STACK to attr.sample_type.

What do you think?

On Fri, Nov 12, 2010 at 5:55 PM, Andi Kleen <andi@xxxxxxxxxxxxxx> wrote:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
> that can be used to monitor any offcore accesses from a core.
> This is a very useful event for various tunings, and it's
> also needed to implement the generic LLC-* events correctly.
>
> Unfortunately this event requires programming a mask in a separate
> register. And worse this separate register is per core, not per
> CPU thread.
>
> This patch adds:
> - Teaches perf_events that OFFCORE_RESPONSE need extra parameters.
> The extra parameters are passed by user space in the unused upper
> 32bits of the config word.
> - Add support to the Intel perf_event core to schedule the per
> core resource. I tried to add generic infrastructure for this
> that could be also used for other core resources.
> The basic code has is patterned after the similar AMD northbridge
> constraints code.
>
> Thanks to Stephane Eranian who pointed out some problems
> in the original version and suggested improvements.
>
> Full git tree:
> git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc-2.6.git perf-offcore2
> Cc: eranian@xxxxxxxxxx
> v2: Lots of updates based on review feedback. Also fixes some issues
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> Âarch/x86/kernel/cpu/perf_event.c    |  70 ++++++++++++++
> Âarch/x86/kernel/cpu/perf_event_intel.c | Â159 ++++++++++++++++++++++++++++++++
> Âinclude/linux/perf_event.h       |  Â2 +
> Â3 files changed, 231 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index ed63101..346006a 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -93,6 +93,8 @@ struct amd_nb {
> Â Â Â Âstruct event_constraint event_constraints[X86_PMC_IDX_MAX];
> Â};
>
> +struct intel_percore;
> +
> Â#define MAX_LBR_ENTRIES Â Â Â Â Â Â Â Â16
>
> Âstruct cpu_hw_events {
> @@ -127,6 +129,13 @@ struct cpu_hw_events {
>    Âstruct perf_branch_stack    Âlbr_stack;
>    Âstruct perf_branch_entry    Âlbr_entries[MAX_LBR_ENTRIES];
>
> + Â Â Â /*
> + Â Â Â Â* Intel percore register state.
> + Â Â Â Â* Coordinate shared resources between HT threads.
> + Â Â Â Â*/
> +    int               percore_used; /* Used by this CPU? */
> +    struct intel_percore      Â*per_core;
> +
> Â Â Â Â/*
> Â Â Â Â * AMD specific bits
> Â Â Â Â */
> @@ -175,6 +184,32 @@ struct cpu_hw_events {
> Â#define for_each_event_constraint(e, c) Â Â Â Â\
> Â Â Â Âfor ((e) = (c); (e)->weight; (e)++)
>
> +/*
> + * Extra registers for specific events.
> + * Some events need large masks and require external MSRs.
> + * Define a mapping to these extra registers.
> + * The actual contents are still encoded in unused parts of the
> + * original config u64.
> + */
> +struct extra_reg {
> +    unsigned int      Âevent;
> +    unsigned int      Âmsr;
> +    unsigned int      Âextra_shift;
> + Â Â Â u64 Â Â Â Â Â Â Â Â Â Â config_mask;
> + Â Â Â u64 Â Â Â Â Â Â Â Â Â Â valid_mask;
> +};
> +
> +#define EVENT_EXTRA_REG(e, ms, m, vm, es) { Â Â\
> + Â Â Â .event = (e), Â Â Â Â Â \
> + Â Â Â .msr = (ms), Â Â Â Â Â Â\
> + Â Â Â .config_mask = (m), Â Â \
> + Â Â Â .valid_mask = (vm), Â Â \
> + Â Â Â .extra_shift = (es), Â Â\
> + Â Â Â }
> +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, es) Â Â Â\
> + Â Â Â EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, es)
> +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, 0)
> +
> Âunion perf_capabilities {
> Â Â Â Âstruct {
>        Âu64   lbr_format  Â: 6;
> @@ -219,6 +254,7 @@ struct x86_pmu {
>    Âvoid      Â(*put_event_constraints)(struct cpu_hw_events *cpuc,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event);
> Â Â Â Âstruct event_constraint *event_constraints;
> + Â Â Â struct event_constraint *percore_constraints;
>    Âvoid      Â(*quirks)(void);
>    Âint       perfctr_second_write;
>
> @@ -247,6 +283,11 @@ struct x86_pmu {
> Â Â Â Â */
>    Âunsigned long  lbr_tos, lbr_from, lbr_to; /* MSR base regs    */
>    Âint       lbr_nr;          Â/* hardware stack size */
> +
> + Â Â Â /*
> + Â Â Â Â* Extra registers for events
> + Â Â Â Â*/
> + Â Â Â struct extra_reg *extra_regs;
> Â};
>
> Âstatic struct x86_pmu x86_pmu __read_mostly;
> @@ -321,6 +362,33 @@ again:
> Â Â Â Âreturn new_raw_count;
> Â}
>
> +/*
> + * Find and validate any extra registers to set up.
> + */
> +static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
> +{
> + Â Â Â struct extra_reg *er;
> + Â Â Â u64 extra;
> +
> + Â Â Â event->hw.extra_reg = 0;
> + Â Â Â event->hw.extra_config = 0;
> +
> + Â Â Â if (!x86_pmu.extra_regs)
> + Â Â Â Â Â Â Â return 0;
> +
> + Â Â Â for (er = x86_pmu.extra_regs; er->msr; er++) {
> + Â Â Â Â Â Â Â if (er->event != (config & er->config_mask))
> + Â Â Â Â Â Â Â Â Â Â Â continue;
> + Â Â Â Â Â Â Â event->hw.extra_reg = er->msr;
> + Â Â Â Â Â Â Â extra = config >> er->extra_shift;
> + Â Â Â Â Â Â Â if (extra & ~er->valid_mask)
> + Â Â Â Â Â Â Â Â Â Â Â return -EINVAL;
> + Â Â Â Â Â Â Â event->hw.extra_config = extra;
> + Â Â Â Â Â Â Â break;
> + Â Â Â }
> + Â Â Â return 0;
> +}
> +
> Âstatic atomic_t active_events;
> Âstatic DEFINE_MUTEX(pmc_reserve_mutex);
>
> @@ -876,6 +944,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âu64 enable_mask)
> Â{
> Â Â Â Âwrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
> + Â Â Â if (hwc->extra_reg)
> + Â Â Â Â Â Â Â wrmsrl(hwc->extra_reg, hwc->extra_config);
> Â}
>
> Âstatic inline void x86_pmu_disable_event(struct perf_event *event)
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index c8f5c08..a84de7e 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1,5 +1,20 @@
> Â#ifdef CONFIG_CPU_SUP_INTEL
>
> +/*
> + * Per core state
> + * This used to coordinate shared resources for HT threads.
> + * Exists per CPU, but only the entry for the first CPU
> + * in the core is used.
> + */
> +struct intel_percore {
> +    raw_spinlock_t     Âlock;      /* protect structure */
> +    int           ref;      Â/* reference count */
> + Â Â Â u64 Â Â Â Â Â Â Â Â Â Â config; Â Â Â Â /* main counter config */
> +    unsigned int      Âextra_reg;   Â/* extra MSR number */
> + Â Â Â u64 Â Â Â Â Â Â Â Â Â Â extra_config; Â /* extra MSR config */
> +};
> +static struct intel_percore __percpu *intel_percore;
> +
> Â/*
> Â* Intel PerfMon, used on Core and later.
> Â*/
> @@ -64,6 +79,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
> Â Â Â ÂEVENT_CONSTRAINT_END
> Â};
>
> +static struct extra_reg intel_nehalem_extra_regs[] =
> +{
> + Â Â Â INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> + Â Â Â EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_nehalem_percore_constraints[] =
> +{
> + Â Â Â INTEL_EVENT_CONSTRAINT(0xb7, 0),
> + Â Â Â EVENT_CONSTRAINT_END
> +};
> +
> Âstatic struct event_constraint intel_westmere_event_constraints[] =
> Â{
> Â Â Â ÂFIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -76,6 +103,20 @@ static struct event_constraint intel_westmere_event_constraints[] =
> Â Â Â ÂEVENT_CONSTRAINT_END
> Â};
>
> +static struct extra_reg intel_westmere_extra_regs[] =
> +{
> + Â Â Â INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> + Â Â Â INTEL_EVENT_EXTRA_REG(0xbb, 0x1a7, 0xffff, 32), /* OFFCORE_RESPONSE_1 */
> + Â Â Â EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_westmere_percore_constraints[] =
> +{
> + Â Â Â INTEL_EVENT_CONSTRAINT(0xb7, 0),
> + Â Â Â INTEL_EVENT_CONSTRAINT(0xbb, 0),
> + Â Â Â EVENT_CONSTRAINT_END
> +};
> +
> Âstatic struct event_constraint intel_gen_event_constraints[] =
> Â{
> Â Â Â ÂFIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -794,6 +835,56 @@ intel_bts_constraints(struct perf_event *event)
> Â}
>
> Âstatic struct event_constraint *
> +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> + Â Â Â struct hw_perf_event *hwc = &event->hw;
> + Â Â Â unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> + Â Â Â struct event_constraint *c;
> + Â Â Â struct intel_percore *pc;
> +
> + Â Â Â if (!x86_pmu.percore_constraints)
> + Â Â Â Â Â Â Â return NULL;
> +
> + Â Â Â for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> + Â Â Â Â Â Â Â if (e != c->code)
> + Â Â Â Â Â Â Â Â Â Â Â continue;
> +
> + Â Â Â Â Â Â Â c = NULL;
> +
> + Â Â Â Â Â Â Â /*
> + Â Â Â Â Â Â Â Â* Allocate resource per core.
> + Â Â Â Â Â Â Â Â* Currently only one such per core resource can be allocated.
> + Â Â Â Â Â Â Â Â*/
> + Â Â Â Â Â Â Â pc = cpuc->per_core;
> + Â Â Â Â Â Â Â if (!pc)
> + Â Â Â Â Â Â Â Â Â Â Â break;
> + Â Â Â Â Â Â Â raw_spin_lock(&pc->lock);
> + Â Â Â Â Â Â Â if (pc->ref > 0) {
> + Â Â Â Â Â Â Â Â Â Â Â /* Allow identical settings */
> + Â Â Â Â Â Â Â Â Â Â Â if (hwc->config == pc->config &&
> + Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_reg == pc->extra_reg &&
> + Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_config == pc->extra_config) {
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pc->ref++;
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
> + Â Â Â Â Â Â Â Â Â Â Â } else {
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /* Deny due to conflict */
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â c = &emptyconstraint;
> + Â Â Â Â Â Â Â Â Â Â Â }
> + Â Â Â Â Â Â Â } else {
> + Â Â Â Â Â Â Â Â Â Â Â pc->config = hwc->config;
> + Â Â Â Â Â Â Â Â Â Â Â pc->extra_reg = hwc->extra_reg;
> + Â Â Â Â Â Â Â Â Â Â Â pc->extra_config = hwc->extra_config;
> + Â Â Â Â Â Â Â Â Â Â Â pc->ref = 1;
> + Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
> + Â Â Â Â Â Â Â }
> + Â Â Â Â Â Â Â raw_spin_unlock(&pc->lock);
> + Â Â Â Â Â Â Â return c;
> + Â Â Â }
> +
> + Â Â Â return NULL;
> +}
> +
> +static struct event_constraint *
> Âintel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â{
> Â Â Â Âstruct event_constraint *c;
> @@ -806,9 +897,38 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
> Â Â Â Âif (c)
> Â Â Â Â Â Â Â Âreturn c;
>
> + Â Â Â c = intel_percore_constraints(cpuc, event);
> + Â Â Â if (c)
> + Â Â Â Â Â Â Â return c;
> +
> Â Â Â Âreturn x86_get_event_constraints(cpuc, event);
> Â}
>
> +static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event)
> +{
> + Â Â Â struct event_constraint *c;
> + Â Â Â struct intel_percore *pc;
> + Â Â Â struct hw_perf_event *hwc = &event->hw;
> + Â Â Â unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> +
> + Â Â Â if (!cpuc->percore_used)
> + Â Â Â Â Â Â Â return;
> +
> + Â Â Â for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> + Â Â Â Â Â Â Â if (e != c->code)
> + Â Â Â Â Â Â Â Â Â Â Â continue;
> +
> + Â Â Â Â Â Â Â pc = cpuc->per_core;
> + Â Â Â Â Â Â Â raw_spin_lock(&pc->lock);
> + Â Â Â Â Â Â Â pc->ref--;
> + Â Â Â Â Â Â Â BUG_ON(pc->ref < 0);
> + Â Â Â Â Â Â Â raw_spin_unlock(&pc->lock);
> + Â Â Â Â Â Â Â cpuc->percore_used = 0;
> + Â Â Â Â Â Â Â break;
> + Â Â Â }
> +}
> +
> Âstatic int intel_pmu_hw_config(struct perf_event *event)
> Â{
> Â Â Â Âint ret = x86_pmu_hw_config(event);
> @@ -854,6 +974,7 @@ static __initconst const struct x86_pmu core_pmu = {
> Â Â Â Â */
>    Â.max_period       = (1ULL << 31) - 1,
> Â Â Â Â.get_event_constraints Â= intel_get_event_constraints,
> + Â Â Â .put_event_constraints Â= intel_put_event_constraints,
>    Â.event_constraints   Â= intel_core_event_constraints,
> Â};
>
> @@ -892,6 +1013,7 @@ static __initconst const struct x86_pmu intel_pmu = {
> Â Â Â Â */
>    Â.max_period       = (1ULL << 31) - 1,
> Â Â Â Â.get_event_constraints Â= intel_get_event_constraints,
> + Â Â Â .put_event_constraints Â= intel_put_event_constraints,
>
>    Â.cpu_starting      = intel_pmu_cpu_starting,
>    Â.cpu_dying       Â= intel_pmu_cpu_dying,
> @@ -923,6 +1045,35 @@ static void intel_clovertown_quirks(void)
> Â Â Â Âx86_pmu.pebs_constraints = NULL;
> Â}
>
> +static __initdata int needs_percore = 1; /* CHANGEME */
> +
> +static __init int init_intel_percore(void)
> +{
> + Â Â Â int cpu;
> +
> + Â Â Â if (!needs_percore)
> + Â Â Â Â Â Â Â return 0;
> +
> + Â Â Â intel_percore = alloc_percpu(struct intel_percore);
> + Â Â Â if (!intel_percore)
> + Â Â Â Â Â Â Â return -ENOMEM;
> +
> + Â Â Â for_each_possible_cpu(cpu) {
> + Â Â Â Â Â Â Â raw_spin_lock_init(&per_cpu_ptr(intel_percore, cpu)->lock);
> + Â Â Â Â Â Â Â per_cpu(cpu_hw_events, cpu).per_core =
> + Â Â Â Â Â Â Â Â Â Â Â per_cpu_ptr(intel_percore,
> + Â Â Â Â Â Â Â Â Â Â Â Â Â cpumask_first(topology_thread_cpumask(cpu)));
> + Â Â Â Â Â Â Â printk("cpu %d core %d\n",
> + Â Â Â Â Â Â Â Â Â Â Âcpu, cpumask_first(topology_thread_cpumask(cpu)));
> + Â Â Â }
> +
> + Â Â Â return 0;
> +}
> +/*
> + * Runs later because per cpu allocations don't work early on.
> + */
> +__initcall(init_intel_percore);
> +
> Âstatic __init int intel_pmu_init(void)
> Â{
> Â Â Â Âunion cpuid10_edx edx;
> @@ -1010,7 +1161,11 @@ static __init int intel_pmu_init(void)
> Â Â Â Â Â Â Â Âintel_pmu_lbr_init_nhm();
>
> Â Â Â Â Â Â Â Âx86_pmu.event_constraints = intel_nehalem_event_constraints;
> + Â Â Â Â Â Â Â x86_pmu.percore_constraints =
> + Â Â Â Â Â Â Â Â Â Â Â intel_nehalem_percore_constraints;
> Â Â Â Â Â Â Â Âx86_pmu.enable_all = intel_pmu_nhm_enable_all;
> + Â Â Â Â Â Â Â x86_pmu.extra_regs = intel_nehalem_extra_regs;
> + Â Â Â Â Â Â Â needs_percore = 1;
> Â Â Â Â Â Â Â Âpr_cont("Nehalem events, ");
> Â Â Â Â Â Â Â Âbreak;
>
> @@ -1032,7 +1187,11 @@ static __init int intel_pmu_init(void)
> Â Â Â Â Â Â Â Âintel_pmu_lbr_init_nhm();
>
> Â Â Â Â Â Â Â Âx86_pmu.event_constraints = intel_westmere_event_constraints;
> + Â Â Â Â Â Â Â x86_pmu.percore_constraints =
> + Â Â Â Â Â Â Â Â Â Â Â intel_westmere_percore_constraints;
> Â Â Â Â Â Â Â Âx86_pmu.enable_all = intel_pmu_nhm_enable_all;
> + Â Â Â Â Â Â Â x86_pmu.extra_regs = intel_westmere_extra_regs;
> + Â Â Â Â Â Â Â needs_percore = 1;
> Â Â Â Â Â Â Â Âpr_cont("Westmere events, ");
> Â Â Â Â Â Â Â Âbreak;
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 057bf22..6124e93 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -529,6 +529,8 @@ struct hw_perf_event {
>            Âunsigned long  event_base;
>            Âint       idx;
>            Âint       last_cpu;
> +            unsigned int  Âextra_reg;
> + Â Â Â Â Â Â Â Â Â Â Â u64 Â Â Â Â Â Â extra_config;
> Â Â Â Â Â Â Â Â};
> Â Â Â Â Â Â Â Âstruct { /* software */
> Â Â Â Â Â Â Â Â Â Â Â Âstruct hrtimer Âhrtimer;
> --
> 1.7.1
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/