[PATCH v1 09/13] perf/x86/amd: add idle hooks for branch sampling

From: Stephane Eranian
Date: Thu Sep 09 2021 - 03:59:25 EST


On AMD Fam19h Zen3, the branch sampling (BRS) feature must be disabled before entering low power
and re-enabled (if was active) when returning from low power. Otherwise, the NMI interrupt may
be held up for too long and cause problems. Stopping BRS will cause the NMI to be delivered if it
was held up.

This patch connects the branch sampling code to the perf_events idle callbacks.

Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
---
arch/x86/events/amd/brs.c | 25 +++++++++++++++++++++++++
arch/x86/events/amd/core.c | 4 ++++
arch/x86/events/perf_event.h | 1 +
3 files changed, 30 insertions(+)

diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 23b969001857..7d27591ba537 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -146,6 +146,7 @@ void amd_brs_enable(void)

/* Set enable bit */
set_debug_extn_cfg(cfg.val);
+ perf_lopwr_active_inc();
}

void amd_brs_disable(void)
@@ -175,6 +176,7 @@ void amd_brs_disable(void)
cfg.brsmen = 0;
set_debug_extn_cfg(cfg.val);
}
+ perf_lopwr_active_dec();
}

static bool amd_brs_match_plm(struct perf_event *event, u64 to)
@@ -292,6 +294,29 @@ static void amd_brs_poison_buffer(void)
wrmsrl(brs_to(idx), BRS_POISON);
}

+/*
+ * called indirectly with irqs masked from mwait_idle_*()
+ */
+void amd_pmu_brs_lopwr_cb(bool lopwr_in)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union amd_debug_extn_cfg cfg;
+
+ /*
+ * on mwait in, we may end up in non C0 state.
+ * we must disable branch sampling to avoid holding the NMI
+ * for too long. We disable it in hardware but we
+ * keep the state in cpuc, so we can re-enable.
+ *
+ * The hardware will deliver the NMI if needed when brsmen cleared
+ */
+ if (cpuc->brs_active) {
+ cfg.val = get_debug_extn_cfg();
+ cfg.brsmen = !lopwr_in;
+ set_debug_extn_cfg(cfg.val);
+ }
+}
+
/*
* On context switch in, we need to make sure no samples from previous user
* are left in the BRS.
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index d6d5119260a9..3e1985cd414d 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1184,12 +1184,16 @@ static int __init amd_core_pmu_init(void)
* invoked on context-switch in via sched_task_in(), so enable only when necessary
*/
if (!amd_brs_init()) {
+ struct pmu *pmu = x86_get_pmu(smp_processor_id());
x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
x86_pmu.sched_task = amd_pmu_sched_task;
/*
* The put_event_constraints callback is shared with
* Fam17h, set above
*/
+
+ /* branch sampling must be stopped when entering low power */
+ perf_register_lopwr_cb(pmu, amd_pmu_brs_lopwr_cb);
}
}

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a275553e78b9..73eac9d34bd9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1242,6 +1242,7 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
}

void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+void amd_pmu_brs_lopwr_cb(bool lopwr_in);

/*
* check if BRS is activated on the CPU
--
2.33.0.153.gba50c8fa24-goog