[PATCH RFC 3/3] perf/report: add off-cpu samples

From: Ajay Kaher
Date: Thu Jul 11 2024 - 08:17:57 EST


off-cpu samples represent the time period for which the target
process not occupying the cpu cycles.

In following example, perf has collected 15 off-cpu samples and
program was running on cpu for 27%:

Samples: 24 of 'task-clock:ppp', 15 of 'offcpu', Event count: ~9150831908 (73% offcpu)
+73.77% 73.77% a.out libc.so.6 [.] clock_nanosleep <-- off-cpu sample
+24.04% 24.04% a.out [vdso] [.] __vdso_gettimeofday <-- on-cpu sample

Signed-off-by: Ajay Kaher <ajay.kaher@xxxxxxxxxxxx>

---
tools/perf/util/events_stats.h | 2 ++
tools/perf/util/evsel.c | 2 ++
tools/perf/util/hist.c | 31 ++++++++++++++++++++++++++++---
tools/perf/util/hist.h | 1 +
tools/perf/util/sample.h | 1 +
5 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h
index 8fecc9fbaecc..7bb3cf1ab835 100644
--- a/tools/perf/util/events_stats.h
+++ b/tools/perf/util/events_stats.h
@@ -44,8 +44,10 @@ struct events_stats {

struct hists_stats {
u64 total_period;
+ u64 total_period_off_cpu;
u64 total_non_filtered_period;
u32 nr_samples;
+ u64 nr_samples_off_cpu;
u32 nr_non_filtered_samples;
u32 nr_lost_samples;
};
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8ba890a5ac6e..ea41586474e3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1146,6 +1146,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->write_backward = opts->overwrite ? 1 : 0;
attr->read_format = PERF_FORMAT_LOST;

+ evsel__set_sample_bit(evsel, CPU);
evsel__set_sample_bit(evsel, IP);
evsel__set_sample_bit(evsel, TID);

@@ -2438,6 +2439,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
u.val32[0] = bswap_32(u.val32[0]);
}

+ data->off_cpu = u.val32[1];
data->cpu = u.val32[0];
array++;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2e9e193179dd..251333e0b021 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -23,6 +23,7 @@
#include "thread.h"
#include "block-info.h"
#include "ui/progress.h"
+#include "ui/util.h"
#include <errno.h>
#include <math.h>
#include <inttypes.h>
@@ -725,6 +726,7 @@ __hists__add_entry(struct hists *hists,
.socket = al->socket,
.cpu = al->cpu,
.cpumode = al->cpumode,
+ .off_cpu = sample->off_cpu,
.ip = al->addr,
.level = al->level,
.code_page_size = sample->code_page_size,
@@ -1076,6 +1078,8 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
callchain_cursor_commit(get_tls_callchain_cursor());

hists__inc_nr_samples(hists, he->filtered);
+ if (sample->off_cpu)
+ ++hists->stats.nr_samples_off_cpu;

return err;
}
@@ -1740,6 +1744,7 @@ void hists__reset_stats(struct hists *hists)
{
hists->nr_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_period_off_cpu = 0;

hists__reset_filter_stats(hists);
}
@@ -1757,6 +1762,9 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)

hists->nr_entries++;
hists->stats.total_period += h->stat.period;
+
+ if (h->off_cpu)
+ hists->stats.total_period_off_cpu += h->stat.period;
}

static void hierarchy_recalc_total_periods(struct hists *hists)
@@ -2745,14 +2753,20 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
struct thread *thread = hists->thread_filter;
int socket_id = hists->socket_filter;
unsigned long nr_samples = hists->stats.nr_samples;
+ unsigned long nr_samples_off_cpu = hists->stats.nr_samples_off_cpu;
u64 nr_events = hists->stats.total_period;
+ int nr_events_off_cpu_percentage = (hists->stats.total_period_off_cpu * 100) / nr_events;
struct evsel *evsel = hists_to_evsel(hists);
const char *ev_name = evsel__name(evsel);
char buf[512], sample_freq_str[64] = "";
+ char oncpu_str[128] = "";
+ char offcpu_str[128] = "";
+ char offcpu_percentage_str[128] = "";
size_t buflen = sizeof(buf);
char ref[30] = " show reference callgraph, ";
bool enable_ref = false;

+
if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
nr_events = hists->stats.total_non_filtered_period;
@@ -2785,10 +2799,21 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->core.attr.sample_freq);

nr_samples = convert_unit(nr_samples, &unit);
+
+ scnprintf(oncpu_str, sizeof(oncpu_str), "%lu%c of '%s',",
+ nr_samples - nr_samples_off_cpu, unit, ev_name);
+
+ if (evsel->core.attr.off_cpu) {
+ scnprintf(offcpu_str, sizeof(offcpu_str), "%lu%c of '%s',",
+ nr_samples_off_cpu, unit, "offcpu");
+ scnprintf(offcpu_percentage_str, sizeof(offcpu_percentage_str),
+ "(%d%% offcpu)", nr_events_off_cpu_percentage);
+ }
+
printed = scnprintf(bf, size,
- "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
- nr_samples, unit, evsel->core.nr_members > 1 ? "s" : "",
- ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+ "Samples: %s %s %s%sEvent count: ~%" PRIu64 " %s",
+ oncpu_str, offcpu_str, sample_freq_str, enable_ref ? ref : " ",
+ nr_events, offcpu_percentage_str);


if (hists->uid_filter_str)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 8fb3bdd29188..c64a07ce92fb 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -236,6 +236,7 @@ struct hist_entry {
/* We are added by hists__add_dummy_entry. */
bool dummy;
bool leaf;
+ bool off_cpu;

char level;
u8 filtered;
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 70b2c3135555..59b0951f4718 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -109,6 +109,7 @@ struct perf_sample {
u16 retire_lat;
};
bool no_hw_idx; /* No hw_idx collected in branch_stack */
+ bool off_cpu;
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
--
2.39.0