[PATCH v10 09/12] perf record: implement -z,--compression_level[=<n>] option
From: Alexey Budankov
Date: Mon Mar 18 2019 - 13:44:50 EST
Implemented -z,--compression_level[=<n>] option that enables compression
of mmaped kernel data buffers content in runtime during perf record
mode collection. Default option value is 1 (fastest compression).
Compression overhead has been measured for serial and AIO streaming
when profiling matrix multiplication workload:
-------------------------------------------------------------
| SERIAL | AIO-1 |
----------------------------------------------------------------|
|-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) |
|---------------------------------------------------------------|
| 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 |
| 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 |
| 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 |
| 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 |
| 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 |
| 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 |
-----------------------------------------------------------------
OVH = (Execution time with -z N) / (Execution time with -z 0)
ratio - compression ratio
size - number of bytes that was compressed
size ~= trace size x ratio
Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
---
tools/perf/Documentation/perf-record.txt | 5 +++++
tools/perf/builtin-record.c | 25 ++++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 18fceb49434e..0567bacc2ae6 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -471,6 +471,11 @@ Also at some cases executing less trace write syscalls with bigger data size can
shorter than executing more trace write syscalls with smaller data size thus lowering
runtime profiling overhead.
+-z::
+--compression-level[=n]::
+Produce compressed trace using specified level n (default: 1 - fastest compression,
+22 - smallest trace)
+
--all-kernel::
Configure all used events to run in kernel space.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2e083891affa..7258f2964a3b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -440,6 +440,26 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+#ifdef HAVE_ZSTD_SUPPORT
+static unsigned int comp_level_default = 1;
+static int record__parse_comp_level(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset) {
+ opts->comp_level = 0;
+ } else {
+ if (str)
+ opts->comp_level = strtol(str, NULL, 0);
+ if (!opts->comp_level)
+ opts->comp_level = comp_level_default;
+ }
+
+ return 0;
+}
+#endif
static unsigned int comp_level_max = 22;
static int record__comp_enabled(struct record *rec)
@@ -2169,6 +2189,11 @@ static struct option __record_options[] = {
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
+#ifdef HAVE_ZSTD_SUPPORT
+ OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
+ "n", "Produce compressed trace using specified level (default: 1 - fastest compression, 22 - smallest trace)",
+ record__parse_comp_level),
+#endif
OPT_END()
};
--
2.20.1