[RFC] perf, x86: Segregate PMU workaraunds into x86_pmu_quirk_opsstructure

From: Cyrill Gorcunov
Date: Sat May 29 2010 - 14:24:35 EST


Hi,

I would appreciate comments/complains on the following patch. The idea is to implement
PMU quirks with minimal impact. At the moment two quirks are addressed -
PEBS disabling on Clovertown and P4 performance counter double write.
PEBS disabling already was there only moved to x86_pmu_quirk_ops. Note
that I didn't use pointer to the structure intensionally but embed it into
x86_pmu, if the structure grow we will need to use a pointer to the structure.

Comments, complains please? Perhaps there is some idea which allow
to handle this all more gentle?

-- Cyrill
---
perf, x86: Segregate PMU workaraunds into x86_pmu_quirk_ops structure

This allow us to handle both Clovertown and Netburst quirks in
a general fashion.

Signed-off-by: Cyrill Gorcunov <gorcunov@xxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event.c | 18 ++++++++--
arch/x86/kernel/cpu/perf_event_intel.c | 55 ++++++++++++++++++---------------
arch/x86/kernel/cpu/perf_event_p4.c | 21 ++++++++++++
3 files changed, 64 insertions(+), 30 deletions(-)

Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
=====================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
@@ -185,6 +185,11 @@ union perf_capabilities {
u64 capabilities;
};

+struct x86_pmu_quirk_ops {
+ void (*pmu_init)(void);
+ void (*perfctr_write)(unsigned long addr, u64 value);
+};
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -218,7 +223,8 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
- void (*quirks)(void);
+
+ struct x86_pmu_quirk_ops quirks;

int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
@@ -924,7 +930,11 @@ x86_perf_event_set_period(struct perf_ev
*/
atomic64_set(&hwc->prev_count, (u64)-left);

- wrmsrl(hwc->event_base + idx,
+ if (x86_pmu.quirks.perfctr_write)
+ x86_pmu.quirks.perfctr_write(hwc->event_base + idx,
+ (u64)(-left) & x86_pmu.cntval_mask);
+ else
+ wrmsrl(hwc->event_base + idx,
(u64)(-left) & x86_pmu.cntval_mask);

perf_event_update_userpage(event);
@@ -1316,8 +1326,8 @@ void __init init_hw_perf_events(void)

pr_cont("%s PMU driver.\n", x86_pmu.name);

- if (x86_pmu.quirks)
- x86_pmu.quirks();
+ if (x86_pmu.quirks.pmu_init)
+ x86_pmu.quirks.pmu_init();

if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_intel.c
=====================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_intel.c
@@ -822,6 +822,36 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}

+static void intel_clovertown_pmu_init(void)
+{
+ /*
+ * PEBS is unreliable due to:
+ *
+ * AJ67 - PEBS may experience CPL leaks
+ * AJ68 - PEBS PMI may be delayed by one event
+ * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
+ * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
+ *
+ * AJ67 could be worked around by restricting the OS/USR flags.
+ * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
+ *
+ * AJ106 could possibly be worked around by not allowing LBR
+ * usage from PEBS, including the fixup.
+ * AJ68 could possibly be worked around by always programming
+ * a pebs_event_reset[0] value and coping with the lost events.
+ *
+ * But taken together it might just make sense to not enable PEBS on
+ * these chips.
+ */
+ printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+ x86_pmu.pebs = 0;
+ x86_pmu.pebs_constraints = NULL;
+}
+
+static __initconst const struct x86_pmu_quirk_ops intel_clovertown_quirks = {
+ .pmu_init = intel_clovertown_pmu_init,
+};
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -848,31 +878,6 @@ static __initconst const struct x86_pmu
.cpu_dying = intel_pmu_cpu_dying,
};

-static void intel_clovertown_quirks(void)
-{
- /*
- * PEBS is unreliable due to:
- *
- * AJ67 - PEBS may experience CPL leaks
- * AJ68 - PEBS PMI may be delayed by one event
- * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
- * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
- *
- * AJ67 could be worked around by restricting the OS/USR flags.
- * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
- *
- * AJ106 could possibly be worked around by not allowing LBR
- * usage from PEBS, including the fixup.
- * AJ68 could possibly be worked around by always programming
- * a pebs_event_reset[0] value and coping with the lost events.
- *
- * But taken together it might just make sense to not enable PEBS on
- * these chips.
- */
- printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
- x86_pmu.pebs = 0;
- x86_pmu.pebs_constraints = NULL;
-}

static __init int intel_pmu_init(void)
{
Index: linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
=====================================================================
--- linux-2.6.git.orig/arch/x86/kernel/cpu/perf_event_p4.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event_p4.c
@@ -804,6 +804,24 @@ done:
return num ? -ENOSPC : 0;
}

+/*
+ * This handles errata N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we repeat write operation twice to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+static void p4_perfctr_write(unsigned long addr, u64 value)
+{
+ wrmsrl(addr, value);
+ wrmsrl(addr, value);
+}
+
+static __initconst const struct x86_pmu_quirk_ops p4_pmu_quirks = {
+ .perfctr_write = p4_perfctr_write,
+};
+
static __initconst const struct x86_pmu p4_pmu = {
.name = "Netburst P4/Xeon",
.handle_irq = p4_pmu_handle_irq,
@@ -850,7 +868,8 @@ static __init int p4_pmu_init(void)

pr_cont("Netburst events, ");

- x86_pmu = p4_pmu;
+ x86_pmu = p4_pmu;
+ x86_pmu.quirks = p4_pmu_quirks;

return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/