[PATCH v4 2/9] perf record --off-cpu: Add --off-cpu-thresh

From: Howard Chu
Date: Wed Aug 07 2024 - 11:39:51 EST


Add the --off-cpu-thresh argument to specify the off-cpu time threshold.
If the off-cpu time exceeds this threshold, dump the off-cpu data
directly.

Suggested-by: Ian Rogers <irogers@xxxxxxxxxx>
Signed-off-by: Howard Chu <howardchu95@xxxxxxxxx>
---
tools/perf/builtin-record.c | 26 ++++++++++++++++++++++++++
tools/perf/util/bpf_off_cpu.c | 2 ++
tools/perf/util/bpf_skel/off_cpu.bpf.c | 2 ++
tools/perf/util/record.h | 1 +
4 files changed, 31 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 72345d1e54b0..60c6fe7b4804 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3133,6 +3133,28 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}

+static int record__parse_off_cpu_thresh(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+ char *endptr;
+ u64 off_cpu_thresh;
+
+ if (!str)
+ return -EINVAL;
+
+ off_cpu_thresh = strtoul(str, &endptr, 10);
+
+ /* threshold isn't string "0", yet strtoull() returns 0, parsing failed. */
+ if (*endptr || (off_cpu_thresh == 0 && strcmp(str, "0")))
+ return -EINVAL;
+ else
+ opts->off_cpu_thresh = off_cpu_thresh;
+
+ return 0;
+}
+
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
{
}
@@ -3326,6 +3348,7 @@ static struct record record = {
.ctl_fd = -1,
.ctl_fd_ack = -1,
.synth = PERF_SYNTH_ALL,
+ .off_cpu_thresh = OFF_CPU_THRESH_DEFAULT,
},
.tool = {
.sample = process_sample_event,
@@ -3560,6 +3583,9 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
"BPF filter action"),
+ OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "us",
+ "Dump off-cpu samples if off-cpu time reaches this threshold. The unit is microseconds. (default: 500000)",
+ record__parse_off_cpu_thresh),
OPT_END()
};

diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 6af36142dc5a..1e0e454bfb5e 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -272,6 +272,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
}
}

+ skel->bss->offcpu_thresh = opts->off_cpu_thresh * 1000ull;
+
err = off_cpu_bpf__attach(skel);
if (err) {
pr_err("Failed to attach off-cpu BPF skeleton\n");
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index d877a0a9731f..cca1b6990a57 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -96,6 +96,8 @@ const volatile bool uses_cgroup_v1 = false;

int perf_subsys_id = -1;

+__u64 sample_id, sample_type, offcpu_thresh;
+
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index a6566134e09e..3c11416e6627 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -79,6 +79,7 @@ struct record_opts {
int synth;
int threads_spec;
const char *threads_user_spec;
+ u64 off_cpu_thresh;
};

extern const char * const *record_usage;
--
2.45.2