Re: [PATCH v2 1/2] perf jevents: Add IOMMU metrics for AMD
From: Sandipan Das
Date: Fri May 29 2026 - 05:30:21 EST
On 29-05-2026 05:14, Chun-Tse Shao wrote:
> Add IOMMU Translation Lookaside Buffer (TLB) and interrupt cache metrics
> to perf jevents for AMD platforms. This enhances I/O performance
> observability, allowing fleet-wide monitoring of IOMMU overhead.
>
> These metrics are supported on Zen 2 and newer processors (Rome, Milan,
> Genoa, Turin) and are implemented using the standard `amd_iommu` PMU
> events. The implementation uses the existing `_zen_model` helper to
> ensure these are only generated for Zen 2+. Note that the pde events on
> AMD cover both 2M and 1G pages, so 1G pages are implicitly included in
> the total hits/misses metrics (sum of pte and pde events).
>
> The following metrics are added:
> - iotlb_total_hit: Total IOTLB hits (4K, 2M, 1G pages).
> - iotlb_total_miss: Total IOTLB misses.
> - iotlb_miss_rate: IOTLB miss rate.
> - iotlb_interrupt_cache_hit: Interrupt cache hits.
> - iotlb_interrupt_cache_miss: Interrupt cache misses.
> - iotlb_interrupt_cache_lookup: Interrupt cache lookups.
> - iotlb_interrupt_cache_miss_rate: Interrupt cache miss rate.
>
> Tested:
> # perf stat -M \
> iotlb_total_hit,iotlb_total_miss,iotlb_miss_rate \
> --per-socket --metric-only -a -j -- sleep 10
> {"socket" : "S0", "counters" : 10,
> "hits iotlb_total_hit" : "3579249.0",
> "% iotlb_miss_rate" : "0.0",
> "misses iotlb_total_miss" : "3.0"}
> {"socket" : "S1", "counters" : 10,
> "hits iotlb_total_hit" : "0.0",
> "% iotlb_miss_rate" : "0.0",
> "misses iotlb_total_miss" : "0.0"}
>
> Signed-off-by: Chun-Tse Shao <ctshao@xxxxxxxxxx>
> Assisted-by: Gemini:gemini-3.1-pro-preview
> ---
Reviewed-by: Sandipan Das <sandipan.das@xxxxxxx>
> tools/perf/pmu-events/amd_metrics.py | 57 ++++++++++++++++++++++++++++
> 1 file changed, 57 insertions(+)
>
> diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py
> index 971f6e7af1f8..dccfcacaf148 100755
> --- a/tools/perf/pmu-events/amd_metrics.py
> +++ b/tools/perf/pmu-events/amd_metrics.py
> @@ -265,6 +265,62 @@ def AmdDtlb() -> Optional[MetricGroup]:
> ], description="Data TLB metrics")
>
>
> +def AmdIotlb() -> Optional[MetricGroup]:
> + global _zen_model
> + if _zen_model < 2:
> + return None
> +
> + # On AMD, the pde events cover both 2M and 1G pages.
> + total_hit = Event("amd_iommu/mem_iommu_tlb_pte_hit/") + Event(
> + "amd_iommu/mem_iommu_tlb_pde_hit/"
> + )
> + total_miss = Event("amd_iommu/mem_iommu_tlb_pte_mis/") + Event(
> + "amd_iommu/mem_iommu_tlb_pde_mis/"
> + )
> + miss_rate = d_ratio(total_miss, total_miss + total_hit)
> +
> + interrupt_cache_hit = Event("amd_iommu/int_dte_hit/")
> + interrupt_cache_miss = Event("amd_iommu/int_dte_mis/")
> + interrupt_cache_lookup = interrupt_cache_hit + interrupt_cache_miss
> + interrupt_cache_miss_rate = d_ratio(
> + interrupt_cache_miss, interrupt_cache_miss + interrupt_cache_hit
> + )
> +
> + return MetricGroup(
> + "iotlb",
> + [
> + Metric("iotlb_total_hit", "IOTLB total hit", total_hit, "hits"),
> + Metric("iotlb_total_miss", "IOTLB total miss", total_miss, "misses"),
> + Metric("iotlb_miss_rate", "IOTLB miss rate", miss_rate, "100%"),
> + Metric(
> + "iotlb_interrupt_cache_hit",
> + "IOTLB interrupt cache hit",
> + interrupt_cache_hit,
> + "hits",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss",
> + "IOTLB interrupt cache miss",
> + interrupt_cache_miss,
> + "misses",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_lookup",
> + "IOTLB interrupt cache lookup",
> + interrupt_cache_lookup,
> + "lookups",
> + ),
> + Metric(
> + "iotlb_interrupt_cache_miss_rate",
> + "IOTLB interrupt cache miss rate",
> + interrupt_cache_miss_rate,
> + "100%",
> + ),
> + ],
> + description="IOMMU TLB metrics",
> + )
> +
> +
> def AmdItlb():
> global _zen_model
> l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
> @@ -473,6 +529,7 @@ def main() -> None:
> AmdBr(),
> AmdCtxSw(),
> AmdDtlb(),
> + AmdIotlb(),
> AmdItlb(),
> AmdLdSt(),
> AmdUpc(),