[PATCH v11 09/10] perf record --off-cpu: Add --off-cpu-thresh option

From: Howard Chu
Date: Wed Dec 11 2024 - 17:27:44 EST


Specify the threshold for dumping offcpu samples with --off-cpu-thresh,
the unit is milliseconds. The default value is 500ms.

Example:

perf record --off-cpu --off-cpu-thresh 824

The example above collects off-cpu samples where the off-cpu time is
longer than 824ms

Suggested-by: Ian Rogers <irogers@xxxxxxxxxx>
Suggested-by: Namhyung Kim <namhyung@xxxxxxxxxx>
Suggested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
Signed-off-by: Howard Chu <howardchu95@xxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: James Clark <james.clark@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/20241108204137.2444151-2-howardchu95@xxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/Documentation/perf-record.txt | 9 ++++++++
tools/perf/builtin-record.c | 26 ++++++++++++++++++++++++
tools/perf/util/off_cpu.h | 1 +
tools/perf/util/record.h | 1 +
4 files changed, 37 insertions(+)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 242223240a08..f3ac4c739d5f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -829,6 +829,15 @@ filtered through the mask provided by -C option.
only, as of now. So the applications built without the frame
pointer might see bogus addresses.

+ off-cpu profiling consists two types of samples: direct samples, which
+ share the same behavior as regular samples, and the accumulated
+ samples, stored in BPF stack trace map, presented after all the regular
+ samples.
+
+--off-cpu-thresh::
+ Once a task's off-cpu time reaches this threshold (in milliseconds), it
+ generates a direct off-cpu sample. The default is 500ms.
+
--setup-filter=<action>::
Prepare BPF filter to be used by regular users. The action should be
either "pin" or "unpin". The filter can be used after it's pinned.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0b637cea4850..4d0d9e36a421 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3147,6 +3147,28 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}

+static int record__parse_off_cpu_thresh(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+ char *endptr;
+ u64 off_cpu_thresh_ms; // converted to us for potential future improvements
+
+ if (!str)
+ return -EINVAL;
+
+ off_cpu_thresh_ms = strtoull(str, &endptr, 10);
+
+ /* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
+ if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
+ return -EINVAL;
+ else
+ opts->off_cpu_thresh_us = off_cpu_thresh_ms * 1000;
+
+ return 0;
+}
+
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
{
}
@@ -3340,6 +3362,7 @@ static struct record record = {
.ctl_fd = -1,
.ctl_fd_ack = -1,
.synth = PERF_SYNTH_ALL,
+ .off_cpu_thresh_us = OFFCPU_THRESH,
},
};

@@ -3562,6 +3585,9 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
"BPF filter action"),
+ OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
+ "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
+ record__parse_off_cpu_thresh),
OPT_END()
};

diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
index 2a4b7f9b2c4c..f07ab2e36317 100644
--- a/tools/perf/util/off_cpu.h
+++ b/tools/perf/util/off_cpu.h
@@ -16,6 +16,7 @@ struct record_opts;
PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \
PERF_SAMPLE_CGROUP)

+#define OFFCPU_THRESH 500000ull

#ifdef HAVE_BPF_SKEL
int off_cpu_prepare(struct evlist *evlist, struct target *target,
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index a6566134e09e..2ca74add26c0 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -79,6 +79,7 @@ struct record_opts {
int synth;
int threads_spec;
const char *threads_user_spec;
+ u64 off_cpu_thresh_us;
};

extern const char * const *record_usage;
--
2.43.0