Re: [PATCH 3/8] perf tools sched: Add compact display option
From: Arnaldo Carvalho de Melo
Date: Tue Apr 12 2016 - 10:09:58 EST
Em Tue, Apr 12, 2016 at 03:29:26PM +0200, Jiri Olsa escreveu:
> Add compact map display that does not output the whole
> cpu matrix, only cpus that got event.
>
> $ perf sched map -c
I was going to point out that you better use --compact, as one-letter
otpions are for some... but then I realized you already did that, will
update your changelog comment :-)
Testing it I noticed that the first few lines get unaligned, is that by
design? Haven't looked at the code.
*. 31203.236293 secs . => swapper:0 (CPU 3)
. *A0 31203.236964 secs A0 => rcu_sched:7 (CPU 0)
. *. 31203.236968 secs
. . *B0 31203.236968 secs B0 => rcuos/2:28 (CPU 1)
. . B0 *C0 31203.236973 secs C0 => rcuos/3:36 (CPU 2)
. *A0 B0 C0 31203.236973 secs
. A0 *. C0 31203.236974 secs
. *. . C0 31203.236975 secs
. . . *. 31203.236986 secs
. *A0 . . 31203.239952 secs
- Arnaldo
> *A0 1082427.094098 secs A0 => perf:19404 (CPU 2)
> A0 *. 1082427.094127 secs . => swapper:0 (CPU 1)
> A0 . *B0 1082427.094174 secs B0 => rcuos/2:25 (CPU 3)
> A0 . *. 1082427.094177 secs
> *C0 . . 1082427.094187 secs C0 => migration/2:21
> C0 *A0 . 1082427.094193 secs
> *. A0 . 1082427.094195 secs
> *D0 A0 . 1082427.094402 secs D0 => rngd:968
> *. A0 . 1082427.094406 secs
> . *E0 . 1082427.095221 secs E0 => kworker/1:1:5333
> . E0 *F0 1082427.095227 secs F0 => xterm:3342
>
> It helps to display sane output for small thread loads
> on big cpu servers.
>
> Link: http://lkml.kernel.org/n/tip-f38ysxz8v6nxoklp7gc4odti@xxxxxxxxxxxxxx
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
> tools/perf/builtin-sched.c | 62 +++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 56 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 871b55ae22a4..64dd94667055 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -122,6 +122,12 @@ struct trace_sched_handler {
> struct machine *machine);
> };
>
> +struct perf_sched_map {
> + DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
> + int *comp_cpus;
> + bool comp;
> +};
> +
> struct perf_sched {
> struct perf_tool tool;
> const char *sort_order;
> @@ -173,6 +179,7 @@ struct perf_sched {
> struct list_head sort_list, cmp_pid;
> bool force;
> bool skip_merge;
> + struct perf_sched_map map;
> };
>
> static u64 get_nsecs(void)
> @@ -1347,13 +1354,24 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
> int new_shortname;
> u64 timestamp0, timestamp = sample->time;
> s64 delta;
> - int cpu, this_cpu = sample->cpu;
> + int i, this_cpu = sample->cpu;
> + int cpus_nr;
> + bool new_cpu = false;
>
> BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
>
> if (this_cpu > sched->max_cpu)
> sched->max_cpu = this_cpu;
>
> + if (sched->map.comp) {
> + cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
> + if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
> + sched->map.comp_cpus[cpus_nr++] = this_cpu;
> + new_cpu = true;
> + }
> + } else
> + cpus_nr = sched->max_cpu;
> +
> timestamp0 = sched->cpu_last_switched[this_cpu];
> sched->cpu_last_switched[this_cpu] = timestamp;
> if (timestamp0)
> @@ -1400,7 +1418,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
> new_shortname = 1;
> }
>
> - for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
> + for (i = 0; i < cpus_nr; i++) {
> + int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
> +
> if (cpu != this_cpu)
> printf(" ");
> else
> @@ -1414,12 +1434,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
>
> printf(" %12.6f secs ", (double)timestamp/1e9);
> if (new_shortname) {
> - printf("%s => %s:%d\n",
> + printf("%s => %s:%d",
> sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
> - } else {
> - printf("\n");
> }
>
> + if (sched->map.comp && new_cpu)
> + printf(" (CPU %d)", this_cpu);
> +
> + printf("\n");
> +
> thread__put(sched_in);
>
> return 0;
> @@ -1675,9 +1698,22 @@ static int perf_sched__lat(struct perf_sched *sched)
> return 0;
> }
>
> +static int setup_map_cpus(struct perf_sched *sched)
> +{
> + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
> +
> + if (sched->map.comp) {
> + sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
> + return sched->map.comp_cpus ? 0 : -1;
> + }
> +
> + return 0;
> +}
> +
> static int perf_sched__map(struct perf_sched *sched)
> {
> - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
> + if (setup_map_cpus(sched))
> + return -1;
>
> setup_pager();
> if (perf_sched__read_events(sched))
> @@ -1831,6 +1867,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> "dump raw trace in ASCII"),
> OPT_END()
> };
> + const struct option map_options[] = {
> + OPT_BOOLEAN(0, "compact", &sched.map.comp,
> + "map output in compact mode"),
> + OPT_END()
> + };
> const char * const latency_usage[] = {
> "perf sched latency [<options>]",
> NULL
> @@ -1839,6 +1880,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> "perf sched replay [<options>]",
> NULL
> };
> + const char * const map_usage[] = {
> + "perf sched map [<options>]",
> + NULL
> + };
> const char *const sched_subcommands[] = { "record", "latency", "map",
> "replay", "script", NULL };
> const char *sched_usage[] = {
> @@ -1887,6 +1932,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> setup_sorting(&sched, latency_options, latency_usage);
> return perf_sched__lat(&sched);
> } else if (!strcmp(argv[0], "map")) {
> + if (argc) {
> + argc = parse_options(argc, argv, map_options, replay_usage, 0);
> + if (argc)
> + usage_with_options(map_usage, map_options);
> + }
> sched.tp_handler = &map_ops;
> setup_sorting(&sched, latency_options, latency_usage);
> return perf_sched__map(&sched);
> --
> 2.4.11