Re: [PATCH] perf jevents: Add IOMMU metrics for AMD and Intel

From: Ian Rogers

Date: Thu May 28 2026 - 15:02:08 EST

On Wed, May 27, 2026 at 3:39 PM Chun-Tse Shao <ctshao@xxxxxxxxxx> wrote:
>
> Add IOMMU Translation Lookaside Buffer (TLB) and interrupt cache metrics
> to perf jevents for both AMD and Intel platforms. This enhances I/O
> performance observability, allowing fleet-wide monitoring of IOMMU
> overhead.
>
> For AMD, these metrics are supported on Zen 2 and newer processors and
> are implemented using the standard `amd_iommu` PMU events. The
> implementation uses the existing `_zen_model` helper to ensure these are
> only generated for Zen 2+.
>
> For Intel, these metrics are supported on platforms that expose the
> required uncore IIO IOMMU events (such as Emerald Rapids and Granite
> Rapids). The Intel implementation dynamically detects event availability
> at generation time. It requires at least the TLB events to expose the
> metric group, while the interrupt cache events are optional. This allows
> platforms like Emerald Rapids, which lack IOMMU interrupt cache events,
> to still expose the IOMMU TLB metrics.
>
> The following metrics are added:
> - iotlb_total_hit: Total IOTLB hits (4K, 2M, 1G pages).
> - iotlb_total_miss: Total IOTLB misses.
> - iotlb_miss_rate: IOTLB miss rate.
> - iotlb_interrupt_cache_hit: Interrupt cache hits.
> - iotlb_interrupt_cache_miss: Interrupt cache misses (calculated for
> Intel as lookup - hit, clamped to zero).
> - iotlb_interrupt_cache_lookup: Interrupt cache lookups.
> - iotlb_interrupt_cache_miss_rate: Interrupt cache miss rate.
>
> Tested:
> # perf stat -M iotlb_total_hit,iotlb_total_miss,iotlb_miss_rate --per-socket --metric-only -a -j -- sleep 10
> {"socket" : "S0", "counters" : 10, "hits iotlb_total_hit" : "3579249.0", "% iotlb_miss_rate" : "0.0", "misses iotlb_total_miss" : "3.0"}
> {"socket" : "S1", "counters" : 10, "hits iotlb_total_hit" : "0.0", "% iotlb_miss_rate" : "0.0", "misses iotlb_total_miss" : "0.0"}
>
> Signed-off-by: Chun-Tse Shao <ctshao@xxxxxxxxxx>
> Assisted-by: Gemini:gemini-3.1-pro-preview

+Taylor, Perry, +Mi, Dapeng1

Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>

Nit: It may be better to have 2 patches, 1 for Intel and 1 for AMD.

Thanks,
Ian

> ---
> tools/perf/pmu-events/amd_metrics.py | 56 +++++++++++++++++++++++
> tools/perf/pmu-events/intel_metrics.py | 62 ++++++++++++++++++++++++++
> 2 files changed, 118 insertions(+)
>
> diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
> index 971f6e7af1f8..4558e7ce20f2 100755
> --- a/tools/perf/pmu-events/amd_metrics.py
> +++ b/tools/perf/pmu-events/amd_metrics.py
> @@ -265,6 +265,61 @@ def AmdDtlb() -> Optional[MetricGroup]:
> ], description="Data TLB metrics")
>
>
> +def AmdIotlb() -> Optional[MetricGroup]:
> + global _zen_model
> + if _zen_model < 2:
> + return None
> +
> + total_hit = Event("amd_iommu/mem_iommu_tlb_pte_hit/") + Event(
> + "amd_iommu/mem_iommu_tlb_pde_hit/"
> + )
> + total_miss = Event("amd_iommu/mem_iommu_tlb_pte_mis/") + Event(
> + "amd_iommu/mem_iommu_tlb_pde_mis/"
> + )
> + miss_rate = d_ratio(total_miss, total_miss + total_hit)
> +
> + interrupt_cache_hit = Event("amd_iommu/int_dte_hit/")
> + interrupt_cache_miss = Event("amd_iommu/int_dte_mis/")
> + interrupt_cache_lookup = interrupt_cache_hit + interrupt_cache_miss
> + interrupt_cache_miss_rate = d_ratio(
> + interrupt_cache_miss, interrupt_cache_miss + interrupt_cache_hit
> + )
> +
> + return MetricGroup(
> + "iotlb",
> + [
> + Metric("iotlb_total_hit", "IOTLB total hit", total_hit, "hits"),
> + Metric("iotlb_total_miss", "IOTLB total miss", total_miss, "misses"),
> + Metric("iotlb_miss_rate", "IOTLB miss rate", miss_rate, "100%"),
> + Metric(
> + "iotlb_interrupt_cache_hit",
> + "IOTLB interrupt cache hit",
> + interrupt_cache_hit,
> + "hits",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss",
> + "IOTLB interrupt cache miss",
> + interrupt_cache_miss,
> + "misses",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_lookup",
> + "IOTLB interrupt cache lookup",
> + interrupt_cache_lookup,
> + "lookups",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss_rate",
> + "IOTLB interrupt cache miss rate",
> + interrupt_cache_miss_rate,
> + "100%",
> + ),
> + ],
> + description="IOMMU TLB metrics",
> + )
> +
> +
> def AmdItlb():
> global _zen_model
> l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
> @@ -473,6 +528,7 @@ def main() -> None:
> AmdBr(),
> AmdCtxSw(),
> AmdDtlb(),
> + AmdIotlb(),
> AmdItlb(),
> AmdLdSt(),
> AmdUpc(),
> diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py
> index 52035433b505..c3a5c2965f74 100755
> --- a/tools/perf/pmu-events/intel_metrics.py
> +++ b/tools/perf/pmu-events/intel_metrics.py
> @@ -457,6 +457,67 @@ def IntelIlp() -> MetricGroup:
> ])
>
>
> +def IntelIotlb() -> Optional[MetricGroup]:
> + try:
> + total_hit = (
> + Event("UNC_IIO_IOMMU0.4K_HITS")
> + + Event("UNC_IIO_IOMMU0.2M_HITS")
> + + Event("UNC_IIO_IOMMU0.1G_HITS")
> + )
> + total_miss = Event("UNC_IIO_IOMMU0.MISSES")
> + except:
> + return None
> +
> + miss_rate = d_ratio(total_miss, total_miss + total_hit)
> + metrics = [
> + Metric("iotlb_total_hit", "IOTLB total hit", total_hit, "hits"),
> + Metric("iotlb_total_miss", "IOTLB total miss", total_miss, "misses"),
> + Metric("iotlb_miss_rate", "IOTLB miss rate", miss_rate, "100%"),
> + ]
> +
> + try:
> + interrupt_cache_hit = Event("UNC_IIO_IOMMU3.INT_CACHE_HITS")
> + interrupt_cache_lookup = Event("UNC_IIO_IOMMU3.INT_CACHE_LOOKUPS")
> + interrupt_cache_miss = max(interrupt_cache_lookup - interrupt_cache_hit, 0)
> + interrupt_cache_miss_rate = d_ratio(
> + interrupt_cache_miss, interrupt_cache_miss + interrupt_cache_hit
> + )
> + metrics += [
> + Metric(
> + "iotlb_interrupt_cache_hit",
> + "IOTLB interrupt cache hit",
> + interrupt_cache_hit,
> + "hits",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss",
> + "IOTLB interrupt cache miss",
> + interrupt_cache_miss,
> + "misses",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_lookup",
> + "IOTLB interrupt cache lookup",
> + interrupt_cache_lookup,
> + "lookups",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss_rate",
> + "IOTLB interrupt cache miss rate",
> + interrupt_cache_miss_rate,
> + "100%",
> + ),
> + ]
> + except:
> + pass
> +
> + return MetricGroup(
> + "iotlb",
> + metrics,
> + description="IOMMU TLB metrics",
> + )
> +
> +
> def IntelL2() -> Optional[MetricGroup]:
> try:
> DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT")
> @@ -1105,6 +1166,7 @@ def main() -> None:
> IntelCtxSw(),
> IntelFpu(),
> IntelIlp(),
> + IntelIotlb(),
> IntelL2(),
> IntelLdSt(),
> IntelMissLat(),
> --
> 2.54.0.823.g6e5bcc1fc9-goog
>