On Fri, Jun 19, 2020 at 07:04:00AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote:
+static void intel_pmu_arch_lbr_enable(bool pmi)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 debugctl, lbr_ctl = 0, orig_debugctl;
+
+ if (pmi)
+ return;
+
+ if (cpuc->lbr_ctl)
+ lbr_ctl = cpuc->lbr_ctl->config & x86_pmu.lbr_ctl_mask;
+ /*
+ * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+ * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+ * may be missed, that can lead to confusing results.
+ */
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ orig_debugctl = debugctl;
+ if (lbr_ctl & ARCH_LBR_CALL_STACK)
+ debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ else
+ debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ if (orig_debugctl != debugctl)
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+
+ wrmsrl(MSR_ARCH_LBR_CTL, lbr_ctl | ARCH_LBR_CTL_LBREN);
+}
This is nearly a duplicate of the old one, surely we can do better?
+static void intel_pmu_arch_lbr_restore(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+ struct x86_perf_arch_lbr_entry *entries = task_ctx->entries;
+ int i;
+
+ /* Fast reset the LBRs before restore if the call stack is not full. */
+ if (!entries[x86_pmu.lbr_nr - 1].lbr_from)
+ intel_pmu_arch_lbr_reset();
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ if (!entries[i].lbr_from)
+ break;
+ wrlbr_from(i, entries[i].lbr_from);
+ wrlbr_to(i, entries[i].lbr_to);
+ wrmsrl(MSR_ARCH_LBR_INFO_0 + i, entries[i].lbr_info);
+ }
+}
This too looks very much like the old one.
+static void intel_pmu_arch_lbr_save(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
+ struct x86_perf_arch_lbr_entry *entries = task_ctx->entries;
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ entries[i].lbr_from = rdlbr_from(i);
+ /* Only save valid branches. */
+ if (!entries[i].lbr_from)
+ break;
+ entries[i].lbr_to = rdlbr_to(i);
+ rdmsrl(MSR_ARCH_LBR_INFO_0 + i, entries[i].lbr_info);
+ }
+
+ /* LBR call stack is not full. Reset is required in restore. */
+ if (i < x86_pmu.lbr_nr)
+ entries[x86_pmu.lbr_nr - 1].lbr_from = 0;
+}
And again..
+static void __intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc, int index,
+ u64 from, u64 to, u64 info)
+{
+ u64 mis = 0, pred = 0, in_tx = 0, abort = 0, type = 0;
+ u32 br_type, to_plm;
+ u16 cycles = 0;
+
+ if (x86_pmu.arch_lbr_mispred) {
+ mis = !!(info & ARCH_LBR_INFO_MISPRED);
+ pred = !mis;
+ }
+ in_tx = !!(info & ARCH_LBR_INFO_IN_TSX);
+ abort = !!(info & ARCH_LBR_INFO_TSX_ABORT);
+ if (x86_pmu.arch_lbr_timed_lbr &&
+ (info & ARCH_LBR_INFO_CYC_CNT_VALID))
+ cycles = (info & ARCH_LBR_INFO_CYC_CNT);
+
+ /*
+ * Parse the branch type recorded in LBR_x_INFO MSR.
+ * Doesn't support OTHER_BRANCH decoding for now.
+ * OTHER_BRANCH branch type still rely on software decoding.
+ */
+ if (x86_pmu.arch_lbr_br_type) {
+ br_type = (info & ARCH_LBR_INFO_BR_TYPE) >> ARCH_LBR_INFO_BR_TYPE_OFFSET;
+
+ if (br_type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ type = arch_lbr_br_type_map[br_type] | to_plm;
+ }
+ }
+
+ cpuc->lbr_entries[index].from = from;
+ cpuc->lbr_entries[index].to = to;
+ cpuc->lbr_entries[index].mispred = mis;
+ cpuc->lbr_entries[index].predicted = pred;
+ cpuc->lbr_entries[index].in_tx = in_tx;
+ cpuc->lbr_entries[index].abort = abort;
+ cpuc->lbr_entries[index].cycles = cycles;
+ cpuc->lbr_entries[index].type = type;
+ cpuc->lbr_entries[index].reserved = 0;
+}
+
+static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
+{
+ u64 from, to, info;
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ from = rdlbr_from(i);
+ to = rdlbr_to(i);
+
+ /*
+ * Read LBR entries until invalid entry (0s) is detected.
+ */
+ if (!from)
+ break;
+
+ rdmsrl(MSR_ARCH_LBR_INFO_0 + i, info);
+
+ __intel_pmu_arch_lbr_read(cpuc, i, from, to, info);
+ }
+
+ cpuc->lbr_stack.nr = i;
+}
+static void intel_pmu_store_pebs_arch_lbrs(struct pebs_lbr *pebs_lbr,
+ struct cpu_hw_events *cpuc)
+{
+ struct pebs_lbr_entry *lbr;
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ lbr = &pebs_lbr->lbr[i];
+
+ /*
+ * Read LBR entries until invalid entry (0s) is detected.
+ */
+ if (!lbr->from)
+ break;
+
+ __intel_pmu_arch_lbr_read(cpuc, i, lbr->from,
+ lbr->to, lbr->info);
+ }
+
+ cpuc->lbr_stack.nr = i;
+ intel_pmu_lbr_filter(cpuc);
+}
Unless I'm reading cross-eyed again, that too is very similar to what we
already had.