On Mon, Sep 16, 2024 at 04:47:19PM +0000, Ravi Bangoria wrote:Sure, I will change.
From: Swapnil Sapkal <swapnil.sapkal@xxxxxxx>
Define new, perf tool only, sample types and their layouts. Add logic
to parse /proc/schedstat, convert it to perf sample format and save
samples to perf.data file with `perf sched stats record` command. Also
add logic to read perf.data file, interpret schedstat samples and
print rawdump of samples with `perf script -D`.
Note that, /proc/schedstat file output is standardized with version
number. The patch supports v15 but older or newer version can be added
easily.
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@xxxxxxx>
Co-developed-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
---
tools/lib/perf/Documentation/libperf.txt | 2 +
tools/lib/perf/Makefile | 2 +-
tools/lib/perf/include/perf/event.h | 42 +++
.../lib/perf/include/perf/schedstat-cpu-v15.h | 13 +
.../perf/include/perf/schedstat-domain-v15.h | 40 +++
tools/perf/builtin-inject.c | 2 +
tools/perf/builtin-sched.c | 222 +++++++++++++++-
tools/perf/util/event.c | 98 +++++++
tools/perf/util/event.h | 2 +
tools/perf/util/session.c | 20 ++
tools/perf/util/synthetic-events.c | 249 ++++++++++++++++++
tools/perf/util/synthetic-events.h | 3 +
tools/perf/util/tool.c | 20 ++
tools/perf/util/tool.h | 4 +-
14 files changed, 716 insertions(+), 3 deletions(-)
create mode 100644 tools/lib/perf/include/perf/schedstat-cpu-v15.h
create mode 100644 tools/lib/perf/include/perf/schedstat-domain-v15.h
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index fcfb9499ef9c..39c78682ad2e 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -211,6 +211,8 @@ SYNOPSIS
struct perf_record_time_conv;
struct perf_record_header_feature;
struct perf_record_compressed;
+ struct perf_record_schedstat_cpu;
+ struct perf_record_schedstat_domain;
--
DESCRIPTION
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 3a9b2140aa04..ebbfea891a6a 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -187,7 +187,7 @@ install_lib: libs
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
-HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
+HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h schedstat-cpu-v15.h schedstat-domain-v15.h
INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 37bb7771d914..35be296d68d5 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -457,6 +457,44 @@ struct perf_record_compressed {
char data[];
};
+struct perf_record_schedstat_cpu_v15 {
+#define CPU_FIELD(_type, _name, _ver) _type _name;
+#include "schedstat-cpu-v15.h"
+#undef CPU_FIELD
+};
+
+struct perf_record_schedstat_cpu {
+ struct perf_event_header header;
+ __u16 version;
+ __u64 timestamp;
+ __u32 cpu;
Can you change the layout to minimize the paddings? Probably better to
add an explicit field for unused bits.
+ union {
+ struct perf_record_schedstat_cpu_v15 v15;
+ };
+};
+
+struct perf_record_schedstat_domain_v15 {
+#define DOMAIN_FIELD(_type, _name, _ver) _type _name;
+#include "schedstat-domain-v15.h"
+#undef DOMAIN_FIELD
+};
+
+#define DOMAIN_NAME_LEN 16
+
+struct perf_record_schedstat_domain {
+ struct perf_event_header header;
+ __u16 version;
+ __u64 timestamp;
+ __u32 cpu;
+ __u16 domain;
Ditto.
+ char name[DOMAIN_NAME_LEN];
+ union {
+ struct perf_record_schedstat_domain_v15 v15;
+ };
+ __u16 nr_cpus;
+ __u8 cpu_mask[];
+};
+
enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
@@ -478,6 +516,8 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_FEATURE = 80,
PERF_RECORD_COMPRESSED = 81,
PERF_RECORD_FINISHED_INIT = 82,
+ PERF_RECORD_SCHEDSTAT_CPU = 83,
+ PERF_RECORD_SCHEDSTAT_DOMAIN = 84,
PERF_RECORD_HEADER_MAX
};
@@ -518,6 +558,8 @@ union perf_event {
struct perf_record_time_conv time_conv;
struct perf_record_header_feature feat;
struct perf_record_compressed pack;
+ struct perf_record_schedstat_cpu schedstat_cpu;
+ struct perf_record_schedstat_domain schedstat_domain;
};
#endif /* __LIBPERF_EVENT_H */
diff --git a/tools/lib/perf/include/perf/schedstat-cpu-v15.h b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
new file mode 100644
index 000000000000..8e4355ee3705
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CPU_FIELD
+CPU_FIELD(__u32, yld_count, v15)
+CPU_FIELD(__u32, array_exp, v15)
+CPU_FIELD(__u32, sched_count, v15)
+CPU_FIELD(__u32, sched_goidle, v15)
+CPU_FIELD(__u32, ttwu_count, v15)
+CPU_FIELD(__u32, ttwu_local, v15)
+CPU_FIELD(__u64, rq_cpu_time, v15)
+CPU_FIELD(__u64, run_delay, v15)
+CPU_FIELD(__u64, pcount, v15)
+#endif
Can we have a single schedstat.h containing both CPU fields and domain
fields?
You might require users to define the macro always and get ridcpu and domain fields, we will need this.
of the ifdef condition here.
The later patches needed this ifdef's so I kept it. If we combine both
Also is there any macro magic to handle the version number? I think youI will think more on this, if it works out cleaner I will update in the next version.
can have the number only (15; without 'v') and compare with input if
needed..
Thanks,--
Namhyung
diff --git a/tools/lib/perf/include/perf/schedstat-domain-v15.h b/tools/lib/perf/include/perf/schedstat-domain-v15.h
new file mode 100644
index 000000000000..422e713d617a
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-domain-v15.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef DOMAIN_FIELD
+DOMAIN_FIELD(__u32, idle_lb_count, v15)
+DOMAIN_FIELD(__u32, idle_lb_balanced, v15)
+DOMAIN_FIELD(__u32, idle_lb_failed, v15)
+DOMAIN_FIELD(__u32, idle_lb_imbalance, v15)
+DOMAIN_FIELD(__u32, idle_lb_gained, v15)
+DOMAIN_FIELD(__u32, idle_lb_hot_gained, v15)
+DOMAIN_FIELD(__u32, idle_lb_nobusyq, v15)
+DOMAIN_FIELD(__u32, idle_lb_nobusyg, v15)
+DOMAIN_FIELD(__u32, busy_lb_count, v15)
+DOMAIN_FIELD(__u32, busy_lb_balanced, v15)
+DOMAIN_FIELD(__u32, busy_lb_failed, v15)
+DOMAIN_FIELD(__u32, busy_lb_imbalance, v15)
+DOMAIN_FIELD(__u32, busy_lb_gained, v15)
+DOMAIN_FIELD(__u32, busy_lb_hot_gained, v15)
+DOMAIN_FIELD(__u32, busy_lb_nobusyq, v15)
+DOMAIN_FIELD(__u32, busy_lb_nobusyg, v15)
+DOMAIN_FIELD(__u32, newidle_lb_count, v15)
+DOMAIN_FIELD(__u32, newidle_lb_balanced, v15)
+DOMAIN_FIELD(__u32, newidle_lb_failed, v15)
+DOMAIN_FIELD(__u32, newidle_lb_imbalance, v15)
+DOMAIN_FIELD(__u32, newidle_lb_gained, v15)
+DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v15)
+DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v15)
+DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v15)
+DOMAIN_FIELD(__u32, alb_count, v15)
+DOMAIN_FIELD(__u32, alb_failed, v15)
+DOMAIN_FIELD(__u32, alb_pushed, v15)
+DOMAIN_FIELD(__u32, sbe_count, v15)
+DOMAIN_FIELD(__u32, sbe_balanced, v15)
+DOMAIN_FIELD(__u32, sbe_pushed, v15)
+DOMAIN_FIELD(__u32, sbf_count, v15)
+DOMAIN_FIELD(__u32, sbf_balanced, v15)
+DOMAIN_FIELD(__u32, sbf_pushed, v15)
+DOMAIN_FIELD(__u32, ttwu_wake_remote, v15)
+DOMAIN_FIELD(__u32, ttwu_move_affine, v15)
+DOMAIN_FIELD(__u32, ttwu_move_balance, v15)
+#endif /* DOMAIN_FIELD */