[PATCH 1/3] perf-events: Add support for supplementary event registers

From: Andi Kleen
Date: Thu Nov 11 2010 - 11:15:37 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.

Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.

This patch adds:
- Teaches perf_events that OFFCORE_RESPONSE need extra parameters.
- Adds a new field to the user interface to pass the extra mask.
This reuses one of the unused fields for perf events. The change
is ABI neutral because noone is likely to have used OFFCORE_RESPONSE
before (with zero mask it wouldn't count anything)
- Add support to the Intel perf_event core to schedule the per
core resource. I tried to add generic infrastructure for this
that could be also used for other core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.

Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.

Cc: eranian@xxxxxxxxxx
Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event.c | 56 +++++++++++++++
arch/x86/kernel/cpu/perf_event_intel.c | 120 ++++++++++++++++++++++++++++++++
include/linux/perf_event.h | 7 ++-
3 files changed, 182 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed63101..97133ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,6 +93,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

+struct intel_percore;
+
#define MAX_LBR_ENTRIES 16

struct cpu_hw_events {
@@ -126,6 +128,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ int percore_used;
+ struct intel_percore *per_core;

/*
* AMD specific bits
@@ -175,6 +179,24 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)

+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ */
+
+struct extra_reg {
+ unsigned event;
+ unsigned msr;
+ u64 config_mask;
+ u64 valid_mask;
+};
+
+#define EVENT_EXTRA_REG(event, msr, m, vm) { event, msr, m, vm }
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END {}
+
union perf_capabilities {
struct {
u64 lbr_format : 6;
@@ -219,6 +241,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
+ struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;

@@ -247,6 +270,11 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+
+ /*
+ * Extra registers for events
+ */
+ struct extra_reg *extra_regs;
};

static struct x86_pmu x86_pmu __read_mostly;
@@ -530,6 +558,28 @@ static int x86_pmu_hw_config(struct perf_event *event)
}

/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(struct perf_event *event)
+{
+ struct extra_reg *er;
+
+ if (!x86_pmu.extra_regs)
+ return 0;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (event->attr.config & er->config_mask))
+ continue;
+ if (event->attr.event_extra & ~er->valid_mask)
+ return -EINVAL;
+ event->hw.extra_reg = er->msr;
+ event->hw.extra_config = event->attr.event_extra;
+ break;
+ }
+ return 0;
+}
+
+/*
* Setup the hardware configuration for a given attr_type
*/
static int __x86_pmu_event_init(struct perf_event *event)
@@ -561,6 +611,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;

+ err = x86_pmu_extra_regs(event);
+ if (err)
+ return err;
+
return x86_pmu.hw_config(event);
}

@@ -876,6 +930,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+ if (hwc->extra_reg)
+ wrmsrl(hwc->extra_reg, hwc->extra_config);
}

static inline void x86_pmu_disable_event(struct perf_event *event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c08..bbe7fba 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,14 @@
#ifdef CONFIG_CPU_SUP_INTEL

+struct intel_percore {
+ raw_spinlock_t lock;
+ int ref;
+ u64 config;
+ unsigned extra_reg;
+ u64 extra_config;
+};
+static DEFINE_PER_CPU(struct intel_percore, intel_percore);
+
/*
* Intel PerfMon, used on Core and later.
*/
@@ -64,6 +73,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
EVENT_CONSTRAINT_END
};

+static struct extra_reg intel_nehalem_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff), /* OFFCORE_RESPONSE1 */
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_nehalem_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_westmere_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +97,20 @@ static struct event_constraint intel_westmere_event_constraints[] =
EVENT_CONSTRAINT_END
};

+static struct extra_reg intel_westmere_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff), /* OFFCORE_RESPONSE1 */
+ INTEL_EVENT_EXTRA_REG(0xbb, 0x1a7, 0xffff), /* OFFCORE_RESPONSE2 */
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ INTEL_EVENT_CONSTRAINT(0xbb, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_gen_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -794,6 +829,56 @@ intel_bts_constraints(struct perf_event *event)
}

static struct event_constraint *
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+ struct event_constraint *c;
+ struct intel_percore *pc;
+
+ if (!x86_pmu.percore_constraints)
+ return NULL;
+
+ for (c = x86_pmu.percore_constraints; c->cmask; c++) {
+ if (e != c->code)
+ continue;
+
+ c = NULL;
+
+ /*
+ * Allocate resource per core.
+ * Currently only one such per core resource can be allocated.
+ */
+ pc = cpuc->per_core;
+ if (!pc)
+ break;
+ raw_spin_lock(&pc->lock);
+ if (pc->ref > 0) {
+ /* Allow identical settings */
+ if (hwc->config == pc->config &&
+ hwc->extra_reg == pc->extra_reg &&
+ hwc->extra_config == pc->extra_config) {
+ pc->ref++;
+ cpuc->percore_used = 1;
+ } else {
+ /* Deny due to conflict */
+ c = &emptyconstraint;
+ }
+ } else {
+ pc->config = hwc->config;
+ pc->extra_reg = hwc->extra_reg;
+ pc->extra_config = hwc->extra_config;
+ pc->ref = 1;
+ cpuc->percore_used = 1;
+ }
+ raw_spin_unlock(&pc->lock);
+ return c;
+ }
+
+ return NULL;
+}
+
+static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct event_constraint *c;
@@ -806,9 +891,29 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
if (c)
return c;

+ c = intel_percore_constraints(cpuc, event);
+ if (c)
+ return c;
+
return x86_get_event_constraints(cpuc, event);
}

+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct intel_percore *pc;
+
+ if (!cpuc->percore_used)
+ return;
+
+ pc = cpuc->per_core;
+ raw_spin_lock(&pc->lock);
+ pc->ref--;
+ BUG_ON(pc->ref < 0);
+ raw_spin_unlock(&pc->lock);
+ cpuc->percore_used = 0;
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -854,6 +959,7 @@ static __initconst const struct x86_pmu core_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
};

@@ -929,6 +1035,7 @@ static __init int intel_pmu_init(void)
union cpuid10_eax eax;
unsigned int unused;
unsigned int ebx;
+ int cpu;
int version;

if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1010,7 +1117,10 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.percore_constraints =
+ intel_nehalem_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
pr_cont("Nehalem events, ");
break;

@@ -1032,7 +1142,10 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.percore_constraints =
+ intel_westmere_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_westmere_extra_regs;
pr_cont("Westmere events, ");
break;

@@ -1043,6 +1156,13 @@ static __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("generic architected perfmon, ");
}
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(&per_cpu(intel_percore, cpu).lock);
+ per_cpu(cpu_hw_events, cpu).per_core =
+ &per_cpu(intel_percore,
+ cpumask_first(topology_core_cpumask(cpu)));
+ }
return 0;
}

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 057bf22..a353594 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -224,7 +224,10 @@ struct perf_event_attr {
};

__u32 bp_type;
- __u64 bp_addr;
+ union {
+ __u64 bp_addr;
+ __u64 event_extra; /* Extra for some events */
+ };
__u64 bp_len;
};

@@ -529,6 +532,8 @@ struct hw_perf_event {
unsigned long event_base;
int idx;
int last_cpu;
+ unsigned extra_reg;
+ u64 extra_config;
};
struct { /* software */
struct hrtimer hrtimer;
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/