[PATCH v2] perf/record: add num-synthesize-threads option

From: Ian Rogers
Date: Mon Apr 20 2020 - 20:30:29 EST


From: Stephane Eranian <eranian@xxxxxxxxxx>

To control degree of parallelism of the synthesize_mmap() code which
is scanning /proc/PID/task/PID/maps and can be time consuming.
Mimic perf top way of handling the option.
If not specified will default to 1 thread, i.e. default behavior before
this option.

Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/Documentation/perf-record.txt | 4 +++
tools/perf/builtin-record.c | 34 ++++++++++++++++++++++--
tools/perf/util/record.h | 1 +
3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b3f3b3f1c161..6e8b4649307c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -596,6 +596,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
Limit the sample data max size, <size> is expected to be a number with
appended unit character - B/K/M/G

+--num-thread-synthesize::
+ The number of threads to run when synthesizing events for existing processes.
+ By default, the number of threads equals 1.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349abe904..2e8011f179f2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -43,6 +43,7 @@
#include "util/time-utils.h"
#include "util/units.h"
#include "util/bpf-event.h"
+#include "util/util.h"
#include "asm/bug.h"
#include "perf.h"

@@ -50,6 +51,7 @@
#include <inttypes.h>
#include <locale.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <signal.h>
@@ -503,6 +505,20 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, NULL, event, event->header.size);
}

+static int process_locked_synthesized_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+ int ret;
+
+ pthread_mutex_lock(&synth_lock);
+ ret = process_synthesized_event(tool, event, sample, machine);
+ pthread_mutex_unlock(&synth_lock);
+ return ret;
+}
+
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
{
struct record *rec = to;
@@ -1288,6 +1304,7 @@ static int record__synthesize(struct record *rec, bool tail)
struct perf_tool *tool = &rec->tool;
int fd = perf_data__fd(data);
int err = 0;
+ event_op f = process_synthesized_event;

if (rec->opts.tail_synthesize != tail)
return 0;
@@ -1402,9 +1419,18 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize cgroup events.\n");

+ if (rec->opts.nr_threads_synthesize > 1) {
+ perf_set_multithreaded();
+ f = process_locked_synthesized_event;
+ }
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
- process_synthesized_event, opts->sample_address,
- 1);
+ f, opts->sample_address,
+ rec->opts.nr_threads_synthesize);
+
+ if (rec->opts.nr_threads_synthesize > 1)
+ perf_set_singlethreaded();
+
out:
return err;
}
@@ -2232,6 +2258,7 @@ static struct record record = {
.default_per_cpu = true,
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
+ .nr_threads_synthesize = 1,
},
.tool = {
.sample = process_sample_event,
@@ -2421,6 +2448,9 @@ static struct option __record_options[] = {
#endif
OPT_CALLBACK(0, "max-size", &record.output_max_size,
"size", "Limit the maximum size of the output file", parse_output_max_size),
+ OPT_UINTEGER(0, "num-thread-synthesize",
+ &record.opts.nr_threads_synthesize,
+ "number of threads to run for event synthesis"),
OPT_END()
};

diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 24316458be20..923565c3b155 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -68,6 +68,7 @@ struct record_opts {
int affinity;
int mmap_flush;
unsigned int comp_level;
+ unsigned int nr_threads_synthesize;
};

extern const char * const *record_usage;
--
2.26.1.301.g55bc3eb7cb9-goog