[PATCH v1 2/3] perf/dwc_pcie: Support narrowed time-based counter for long time monitoring
From: Yicong Yang
Date: Mon Jun 15 2026 - 02:38:52 EST
From: Yufan Dou <douyufan@xxxxxxxxxxxxx>
The DWC PCIe Time-Based Analysis Data Register (the counter for time-based
events) is architected as 64-bit, but some hardware implementations do not
implement the full width. On these implementations the counter stops after
reaching its implemented width. This will limit the usage for short time
monitoring only. The counter will only cover ~15s for monitoring RX TLP
payloads on our platform.
Add an optional hrtimer that fires every 2 seconds. It'll take the role
as the counter overflow interrupt to read-update-reset the counter and
event counts to break the limits of the narrow counters. It'll only
apply on timer-based counter. The 2 seconds update period is the half
of the maximum counting period (4s) of the time-based counter under
period counting mode of the hardware.
Because fully-implemented 64-bit counters do not need this workaround,
expose it via the module parameter timer_enable (default N) so that
users can opt in only on affected platforms.
Before this patch, when counting fio for 10m the counts is incorrect:
root@localhost:/tmp# echo 0 > /sys/module/dwc_pcie_pmu/parameters/timer_enable
root@localhost:/tmp# perf stat -e dwc_rootport_20000/rx_pcie_tlp_data_payload/ -- fio --runtime=10m fio_job.config
[...]
Run status group 0 (all jobs):
READ: bw=5594MiB/s (5865MB/s), 5594MiB/s-5594MiB/s (5865MB/s-5865MB/s), io=3278GiB (3519GB), run=600010-600010msec
[...]
Performance counter stats for 'system wide':
137,438,953,456 dwc_rootport_20000/rx_pcie_tlp_data_payload/
After this patch the counts is as expected:
root@localhost:/tmp# echo 1 > /sys/module/dwc_pcie_pmu/parameters/timer_enable
root@localhost:/tmp# perf stat -e dwc_rootport_20000/rx_pcie_tlp_data_payload/ -- fio --runtime=10m fio_job.config
[...]
Run status group 0 (all jobs):
READ: bw=5632MiB/s (5905MB/s), 5632MiB/s-5632MiB/s (5905MB/s-5905MB/s), io=3300GiB (3543GB), run=600013-600013msec
[...]
Performance counter stats for 'system wide':
3,543,850,268,576 dwc_rootport_20000/rx_pcie_tlp_data_payload/
Signed-off-by: Yufan Dou <douyufan@xxxxxxxxxxxxx>
Signed-off-by: Yicong Yang <yang.yicong@xxxxxxxxxxxxx>
---
drivers/perf/dwc_pcie_pmu.c | 59 +++++++++++++++++++++++++++++++++++--
1 file changed, 57 insertions(+), 2 deletions(-)
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index 5385401fa9cf..abf50f173202 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -11,6 +11,7 @@
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/errno.h>
+#include <linux/hrtimer.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/pcie-dwc.h>
@@ -83,6 +84,11 @@ enum dwc_pcie_event_type {
#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
+#define DWC_PCIE_PMU_TIMER_PERIOD_NS (2 * NSEC_PER_SEC)
+
+static bool timer_enable;
+module_param_named(timer_enable, timer_enable, bool, 0644);
+MODULE_PARM_DESC(timer_enable, "Enable hrtimer for time-based events (default: N)");
struct dwc_pcie_pmu {
struct pmu pmu;
@@ -93,6 +99,7 @@ struct dwc_pcie_pmu {
/* Groups #6 and #7 */
DECLARE_BITMAP(lane_events, 2 * DWC_PCIE_LANE_MAX_EVENTS_PER_GROUP);
struct perf_event *time_based_event;
+ struct hrtimer hrtimer;
struct hlist_node cpuhp_node;
int on_cpu;
@@ -354,6 +361,26 @@ static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event)
return val;
}
+static void dwc_pcie_pmu_reset_time_based_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev;
+
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
+
+ /*
+ * The hardware counter is reset to zero when disabled. Synchronize
+ * prev_count so that the next event_update() computes the correct
+ * delta against the new counter baseline.
+ */
+ do {
+ prev = local64_read(&hwc->prev_count);
+ } while (local64_cmpxchg(&hwc->prev_count, prev, 0) != prev);
+
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+}
+
static void dwc_pcie_pmu_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -429,6 +456,25 @@ static int dwc_pcie_pmu_validate_group(struct perf_event *event)
return 0;
}
+static enum hrtimer_restart dwc_pcie_pmu_hrtimer_callback(struct hrtimer *hrtimer)
+{
+ struct dwc_pcie_pmu *pcie_pmu = container_of(hrtimer, struct dwc_pcie_pmu, hrtimer);
+ struct perf_event *event = pcie_pmu->time_based_event;
+ struct hw_perf_event *hwc;
+
+ if (!event)
+ return HRTIMER_NORESTART;
+
+ hwc = &event->hw;
+ if (hwc->state & PERF_HES_STOPPED)
+ return HRTIMER_NORESTART;
+
+ dwc_pcie_pmu_event_update(event);
+ dwc_pcie_pmu_reset_time_based_counter(event);
+ hrtimer_forward_now(hrtimer, ns_to_ktime(DWC_PCIE_PMU_TIMER_PERIOD_NS));
+ return HRTIMER_RESTART;
+}
+
static int dwc_pcie_pmu_event_init(struct perf_event *event)
{
struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
@@ -480,8 +526,13 @@ static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
if (type == DWC_PCIE_LANE_EVENT)
dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, true);
- else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ else if (type == DWC_PCIE_TIME_BASE_EVENT) {
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+ if (timer_enable)
+ hrtimer_start(&pcie_pmu->hrtimer,
+ ns_to_ktime(DWC_PCIE_PMU_TIMER_PERIOD_NS),
+ HRTIMER_MODE_REL_PINNED_HARD);
+ }
}
static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
@@ -497,8 +548,10 @@ static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
if (type == DWC_PCIE_LANE_EVENT)
dwc_pcie_pmu_lane_event_enable(pcie_pmu, event, false);
- else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ else if (type == DWC_PCIE_TIME_BASE_EVENT) {
dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
+ hrtimer_cancel(&pcie_pmu->hrtimer);
+ }
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
@@ -726,6 +779,8 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
pcie_pmu->ras_des_offset = vsec;
pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
pcie_pmu->on_cpu = -1;
+ hrtimer_setup(&pcie_pmu->hrtimer, dwc_pcie_pmu_hrtimer_callback,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_HARD);
pcie_pmu->pmu = (struct pmu){
.name = name,
.parent = &plat_dev->dev,
--
2.50.1 (Apple Git-155)