[PATCH v1 08/20] perf jevents: Add L2 metrics for Intel

From: Ian Rogers
Date: Wed Feb 28 2024 - 19:21:09 EST

Next message: Ian Rogers: "[PATCH v1 09/20] perf jevents: Add load store breakdown metrics ldst for Intel"
Previous message: Ian Rogers: "[PATCH v1 07/20] perf jevents: Add ports metric group giving utilization on Intel"
In reply to: Ian Rogers: "[PATCH v1 07/20] perf jevents: Add ports metric group giving utilization on Intel"
Next in thread: Ian Rogers: "[PATCH v1 09/20] perf jevents: Add load store breakdown metrics ldst for Intel"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Give a breakdown of various L2 counters as metrics, including totals,
reads, hardware prefetcher, RFO, code and evictions.

Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/pmu-events/intel_metrics.py | 158 +++++++++++++++++++++++++
1 file changed, 158 insertions(+)

diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
index 63d46ee1dca9..d22a1abca8d9 100755
--- a/tools/perf/pmu-events/intel_metrics.py
+++ b/tools/perf/pmu-events/intel_metrics.py
@@ -271,6 +271,163 @@ def IntelBr():
description="breakdown of retired branch instructions")

+def IntelL2() -> Optional[MetricGroup]:
+ try:
+ DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT")
+ except:
+ return None
+ try:
+ DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS")
+ l2_dmnd_miss = DC_MISS
+ l2_dmnd_rd_all = DC_MISS + DC_HIT
+ except:
+ DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD")
+ l2_dmnd_miss = DC_ALL - DC_HIT
+ l2_dmnd_rd_all = DC_ALL
+ l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec)
+ l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec)
+
+ DC_PFH = None
+ DC_PFM = None
+ l2_pf_all = None
+ l2_pf_mrate = None
+ l2_pf_rrate = None
+ try:
+ DC_PFH = Event("L2_RQSTS.PF_HIT")
+ DC_PFM = Event("L2_RQSTS.PF_MISS")
+ l2_pf_all = DC_PFH + DC_PFM
+ l2_pf_mrate = d_ratio(DC_PFM, interval_sec)
+ l2_pf_rrate = d_ratio(l2_pf_all, interval_sec)
+ except:
+ pass
+
+ DC_RFOH = Event("L2_RQSTS.RFO_HIT")
+ DC_RFOM = Event("L2_RQSTS.RFO_MISS")
+ l2_rfo_all = DC_RFOH + DC_RFOM
+ l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec)
+ l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec)
+
+ DC_CH = Event("L2_RQSTS.CODE_RD_HIT")
+ DC_CM = Event("L2_RQSTS.CODE_RD_MISS")
+ DC_IN = Event("L2_LINES_IN.ALL")
+ DC_OUT_NS = None
+ DC_OUT_S = None
+ l2_lines_out = None
+ l2_out_rate = None
+ wbn = None
+ isd = None
+ try:
+ DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT",
+ "L2_LINES_OUT.DEMAND_DIRTY",
+ "L2_LINES_IN.S")
+ DC_OUT_S = Event("L2_LINES_OUT.SILENT",
+ "L2_LINES_OUT.DEMAND_CLEAN",
+ "L2_LINES_IN.I")
+ if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and (
+ args.model.startswith("skylake") or
+ args.model == "cascadelakex"):
+ DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/"
+ # bring is back to per-CPU
+ l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S)
+ l2_ns = DC_OUT_NS
+ l2_lines_out = l2_s + l2_ns;
+ l2_out_rate = d_ratio(l2_lines_out, interval_sec);
+ nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0)
+ wbn = d_ratio(nlr, interval_sec)
+ isd = d_ratio(l2_s, interval_sec)
+ except:
+ pass
+ DC_OUT_U = None
+ l2_pf_useless = None
+ l2_useless_rate = None
+ try:
+ DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF")
+ l2_pf_useless = DC_OUT_U
+ l2_useless_rate = d_ratio(l2_pf_useless, interval_sec)
+ except:
+ pass
+ DC_WB_U = None
+ DC_WB_D = None
+ wbu = None
+ wbd = None
+ try:
+ DC_WB_U = Event("IDI_MISC.WB_UPGRADE")
+ DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE")
+ wbu = d_ratio(DC_WB_U, interval_sec)
+ wbd = d_ratio(DC_WB_D, interval_sec)
+ except:
+ pass
+
+ l2_lines_in = DC_IN
+ l2_code_all = DC_CH + DC_CM
+ l2_code_rate = d_ratio(l2_code_all, interval_sec)
+ l2_code_miss_rate = d_ratio(DC_CM, interval_sec)
+ l2_in_rate = d_ratio(l2_lines_in, interval_sec)
+
+ return MetricGroup("l2", [
+ MetricGroup("l2_totals", [
+ Metric("l2_totals_in", "L2 cache total in per second",
+ l2_in_rate, "In/s"),
+ Metric("l2_totals_out", "L2 cache total out per second",
+ l2_out_rate, "Out/s") if l2_out_rate else None,
+ ]),
+ MetricGroup("l2_rd", [
+ Metric("l2_rd_hits", "L2 cache data read hits",
+ d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"),
+ Metric("l2_rd_hits", "L2 cache data read hits",
+ d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"),
+ Metric("l2_rd_requests", "L2 cache data read requests per second",
+ l2_dmnd_rrate, "requests/s"),
+ Metric("l2_rd_misses", "L2 cache data read misses per second",
+ l2_dmnd_mrate, "misses/s"),
+ ]),
+ MetricGroup("l2_hwpf", [
+ Metric("l2_hwpf_hits", "L2 cache hardware prefetcher hits",
+ d_ratio(DC_PFH, l2_pf_all), "100%"),
+ Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses",
+ d_ratio(DC_PFM, l2_pf_all), "100%"),
+ Metric("l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second",
+ l2_useless_rate, "100%") if l2_useless_rate else None,
+ Metric("l2_hwpf_requests", "L2 cache hardware prefetcher requests per second",
+ l2_pf_rrate, "100%"),
+ Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses per second",
+ l2_pf_mrate, "100%"),
+ ]) if DC_PFH else None,
+ MetricGroup("l2_rfo", [
+ Metric("l2_rfo_hits", "L2 cache request for ownership (RFO) hits",
+ d_ratio(DC_RFOH, l2_rfo_all), "100%"),
+ Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses",
+ d_ratio(DC_RFOM, l2_rfo_all), "100%"),
+ Metric("l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second",
+ l2_rfo_rrate, "requests/s"),
+ Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second",
+ l2_rfo_mrate, "misses/s"),
+ ]),
+ MetricGroup("l2_code", [
+ Metric("l2_code_hits", "L2 cache code hits",
+ d_ratio(DC_CH, l2_code_all), "100%"),
+ Metric("l2_code_misses", "L2 cache code misses",
+ d_ratio(DC_CM, l2_code_all), "100%"),
+ Metric("l2_code_requests", "L2 cache code requests per second",
+ l2_code_rate, "requests/s"),
+ Metric("l2_code_misses", "L2 cache code misses per second",
+ l2_code_miss_rate, "misses/s"),
+ ]),
+ MetricGroup("l2_evict", [
+ MetricGroup("l2_evict_mef_lines", [
+ Metric("l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second",
+ wbu, "HotLRU/s") if wbu else None,
+ Metric("l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second",
+ wbn, "NormLRU/s") if wbn else None,
+ Metric("l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second",
+ wbd, "dropped/s") if wbd else None,
+ Metric("l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second",
+ isd, "dropped/s") if isd else None,
+ ]),
+ ]),
+ ], description = "L2 data cache analysis")
+
+
def IntelPorts() -> Optional[MetricGroup]:
pipeline_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}"
f"/arch/x86/{args.model}/pipeline.json"))
@@ -363,6 +520,7 @@ all_metrics = MetricGroup("", [
Smi(),
Tsx(),
IntelBr(),
+ IntelL2(),
IntelPorts(),
IntelSwpf(),
])
--
2.44.0.278.ge034bb2e1d-goog

Next message: Ian Rogers: "[PATCH v1 09/20] perf jevents: Add load store breakdown metrics ldst for Intel"
Previous message: Ian Rogers: "[PATCH v1 07/20] perf jevents: Add ports metric group giving utilization on Intel"
In reply to: Ian Rogers: "[PATCH v1 07/20] perf jevents: Add ports metric group giving utilization on Intel"
Next in thread: Ian Rogers: "[PATCH v1 09/20] perf jevents: Add load store breakdown metrics ldst for Intel"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]