[RFC PATCH v9 5/7] perf stat: Add command line option for enabling tpebs recording

From: weilin . wang
Date: Tue May 21 2024 - 13:41:04 EST


From: Weilin Wang <weilin.wang@xxxxxxxxx>

With this command line option, tpebs recording is turned off in perf stat on
default. It will only be turned on when this option is given in perf stat
command.

Signed-off-by: Weilin Wang <weilin.wang@xxxxxxxxx>
---
tools/perf/builtin-stat.c | 19 +++++++++++++------
tools/perf/util/evsel.c | 19 ++++++++++++++-----
2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c0e9dfa3b3c2..c27521fb1aee 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -117,6 +117,7 @@ static volatile sig_atomic_t child_pid = -1;
static int detailed_run = 0;
static bool transaction_run;
static bool topdown_run = false;
+static bool tpebs_recording = false;
static bool smi_cost = false;
static bool smi_reset = false;
static int big_num_opt = -1;
@@ -677,9 +678,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int err;
bool second_pass = false;

- err = start_tpebs(&stat_config, evsel_list);
- if (err < 0)
- return err;
+ if (tpebs_recording) {
+ err = start_tpebs(&stat_config, evsel_list);
+ if (err < 0)
+ return err;
+ }

if (forks) {
if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
@@ -886,9 +889,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)

t1 = rdclock();

- err = stop_tpebs();
- if (err < 0)
- return err;
+ if (tpebs_recording) {
+ err = stop_tpebs();
+ if (err < 0)
+ return err;
+ }

if (stat_config.walltime_run_table)
stat_config.walltime_run[run_idx] = t1 - t0;
@@ -1246,6 +1251,8 @@ static struct option stat_options[] = {
"disable adding events for the metric threshold calculation"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure top-down statistics"),
+ OPT_BOOLEAN(0, "enable-tpebs-recording", &tpebs_recording,
+ "enable recording for tpebs when retire_latency required"),
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
"Set the metrics level for the top-down statistics (0: max level)"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4d700338fc99..e1f3f63dfb54 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1540,21 +1540,30 @@ static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int threa
}
}

- if (!found)
- return -1;
+ /* Set ena and run to non-zero */
+ count->ena = count->run = 1;
+ count->lost = 0;
+
+ if (!found) {
+ /*
+ * Set default value or 0 when retire_latency for this event is
+ * not found from sampling data (enable_tpebs_recording not set
+ * or 0 sample recorded).
+ */
+ val = 0;
+ return 0;
+ }

/*
* Only set retire_latency value to the first CPU and thread.
*/
if (cpu_map_idx == 0 && thread == 0)
+ /* Lost precision when casting from double to __u64. Any improvement? */
val = t->val;
else
val = 0;

count->val = val;
- /* Set ena and run to non-zero */
- count->ena = count->run = 1;
- count->lost = 0;
return 0;
}

--
2.43.0