Re: [RESEND PATCH 04/12] perf/x86/intel: Support new data source for Lunar Lake

From: Liang, Kan
Date: Thu Jun 20 2024 - 12:10:10 EST




On 2024-06-20 3:34 a.m., Peter Zijlstra wrote:
> On Tue, Jun 18, 2024 at 08:10:36AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote:
>
>> @@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
>> #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
>>
>> /* Version for Sandy Bridge and later */
>> -static u64 pebs_data_source[] = {
>> +static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
>> P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
>> OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
>> OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
>> @@ -173,6 +182,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
>> __intel_pmu_pebs_data_source_cmt(pebs_data_source);
>> }
>>
>> +/* Version for Lunar Lake p-core and later */
>> +static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
>
> Comment and naming are inconsistent, please lion-cove and lnc
>
>> + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
>> + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
>> + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
>> + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
>> + 0, /* 0x04: Reserved */
>> + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
>> + OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
>> + 0, /* 0x07: Reserved */
>> + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
>> + 0, /* 0x09: Reserved */
>> + 0, /* 0x0a: Reserved */
>> + 0, /* 0x0b: Reserved */
>> + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
>> + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
>> + 0, /* 0x0e: Reserved */
>> + P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
>> + OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
>> + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
>> +};
>> +
>> +void __init intel_pmu_pebs_data_source_lnl(void)
>> +{
>> + u64 *data_source;
>> +
>> + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
>> + memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
>> +
>> + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
>> + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
>> + __intel_pmu_pebs_data_source_cmt(data_source);
>> +}
>> +
>> static u64 precise_store_data(u64 status)
>> {
>> union intel_x86_pebs_dse dse;
>> @@ -264,7 +307,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
>>
>> WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
>>
>> - dse &= PERF_PEBS_DATA_SOURCE_MASK;
>> + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
>> val = hybrid_var(event->pmu, pebs_data_source)[dse];
>>
>> pebs_set_tlb_lock(&val, tlb, lock);
>> @@ -300,6 +343,45 @@ u64 mtl_latency_data_small(struct perf_event *event, u64 status)
>> dse.mtl_fwd_blk);
>> }
>>
>> +u64 lnl_latency_data(struct perf_event *event, u64 status)
>> +{
>> + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
>> + union intel_x86_pebs_dse dse;
>> + union perf_mem_data_src src;
>> + u64 val;
>> +
>> + if (pmu->pmu_type == hybrid_small)
>> + return mtl_latency_data_small(event, status);
>
> argh,.. can you please go and rename this mtl_small nonsense to .. /me
> googles.. crestmont / crm ?
>
> Oh gawd, and the adl_small things to gracemont / gcm ?

Sure, I will add a cleanup patch to use the specific code name.

Thank,
Kan

>
>> +
> return lnc_latency_data();
> }
>
>
> u64 lnc_latency_data()
> {
>> + dse.val = status;
>> +
>> + /* LNC core latency data */
>> + val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
>> + if (!val)
>> + val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
>> +
>> + if (dse.lnc_stlb_miss)
>> + val |= P(TLB, MISS) | P(TLB, L2);
>> + else
>> + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
>> +
>> + if (dse.lnc_locked)
>> + val |= P(LOCK, LOCKED);
>> +
>> + if (dse.lnc_data_blk)
>> + val |= P(BLK, DATA);
>> + if (dse.lnc_addr_blk)
>> + val |= P(BLK, ADDR);
>> + if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
>> + val |= P(BLK, NA);
>> +
>> + src.val = val;
>> + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
>> + src.mem_op = P(OP, STORE);
>> +
>> + return src.val;
>> +}
>