Re: [PATCH 3/8] perf tools sched: Add compact display option

From: Arnaldo Carvalho de Melo
Date: Tue Apr 12 2016 - 10:18:52 EST


Em Tue, Apr 12, 2016 at 11:09:47AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Tue, Apr 12, 2016 at 03:29:26PM +0200, Jiri Olsa escreveu:
> > Add compact map display that does not output the whole
> > cpu matrix, only cpus that got event.
> >
> > $ perf sched map -c
>
> I was going to point out that you better use --compact, as one-letter
> otpions are for some... but then I realized you already did that, will
> update your changelog comment :-)
>
> Testing it I noticed that the first few lines get unaligned, is that by
> design? Haven't looked at the code.
>
> *. 31203.236293 secs . => swapper:0 (CPU 3)
> . *A0 31203.236964 secs A0 => rcu_sched:7 (CPU 0)
> . *. 31203.236968 secs
> . . *B0 31203.236968 secs B0 => rcuos/2:28 (CPU 1)
> . . B0 *C0 31203.236973 secs C0 => rcuos/3:36 (CPU 2)
> . *A0 B0 C0 31203.236973 secs
> . A0 *. C0 31203.236974 secs
> . *. . C0 31203.236975 secs
> . . . *. 31203.236986 secs
> . *A0 . . 31203.239952 secs
>

Got it, by design, applied.

> - Arnaldo
>
> > *A0 1082427.094098 secs A0 => perf:19404 (CPU 2)
> > A0 *. 1082427.094127 secs . => swapper:0 (CPU 1)
> > A0 . *B0 1082427.094174 secs B0 => rcuos/2:25 (CPU 3)
> > A0 . *. 1082427.094177 secs
> > *C0 . . 1082427.094187 secs C0 => migration/2:21
> > C0 *A0 . 1082427.094193 secs
> > *. A0 . 1082427.094195 secs
> > *D0 A0 . 1082427.094402 secs D0 => rngd:968
> > *. A0 . 1082427.094406 secs
> > . *E0 . 1082427.095221 secs E0 => kworker/1:1:5333
> > . E0 *F0 1082427.095227 secs F0 => xterm:3342
> >
> > It helps to display sane output for small thread loads
> > on big cpu servers.
> >
> > Link: http://lkml.kernel.org/n/tip-f38ysxz8v6nxoklp7gc4odti@xxxxxxxxxxxxxx
> > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> > ---
> > tools/perf/builtin-sched.c | 62 +++++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 56 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> > index 871b55ae22a4..64dd94667055 100644
> > --- a/tools/perf/builtin-sched.c
> > +++ b/tools/perf/builtin-sched.c
> > @@ -122,6 +122,12 @@ struct trace_sched_handler {
> > struct machine *machine);
> > };
> >
> > +struct perf_sched_map {
> > + DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
> > + int *comp_cpus;
> > + bool comp;
> > +};
> > +
> > struct perf_sched {
> > struct perf_tool tool;
> > const char *sort_order;
> > @@ -173,6 +179,7 @@ struct perf_sched {
> > struct list_head sort_list, cmp_pid;
> > bool force;
> > bool skip_merge;
> > + struct perf_sched_map map;
> > };
> >
> > static u64 get_nsecs(void)
> > @@ -1347,13 +1354,24 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
> > int new_shortname;
> > u64 timestamp0, timestamp = sample->time;
> > s64 delta;
> > - int cpu, this_cpu = sample->cpu;
> > + int i, this_cpu = sample->cpu;
> > + int cpus_nr;
> > + bool new_cpu = false;
> >
> > BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
> >
> > if (this_cpu > sched->max_cpu)
> > sched->max_cpu = this_cpu;
> >
> > + if (sched->map.comp) {
> > + cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
> > + if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
> > + sched->map.comp_cpus[cpus_nr++] = this_cpu;
> > + new_cpu = true;
> > + }
> > + } else
> > + cpus_nr = sched->max_cpu;
> > +
> > timestamp0 = sched->cpu_last_switched[this_cpu];
> > sched->cpu_last_switched[this_cpu] = timestamp;
> > if (timestamp0)
> > @@ -1400,7 +1418,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
> > new_shortname = 1;
> > }
> >
> > - for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
> > + for (i = 0; i < cpus_nr; i++) {
> > + int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
> > +
> > if (cpu != this_cpu)
> > printf(" ");
> > else
> > @@ -1414,12 +1434,15 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
> >
> > printf(" %12.6f secs ", (double)timestamp/1e9);
> > if (new_shortname) {
> > - printf("%s => %s:%d\n",
> > + printf("%s => %s:%d",
> > sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
> > - } else {
> > - printf("\n");
> > }
> >
> > + if (sched->map.comp && new_cpu)
> > + printf(" (CPU %d)", this_cpu);
> > +
> > + printf("\n");
> > +
> > thread__put(sched_in);
> >
> > return 0;
> > @@ -1675,9 +1698,22 @@ static int perf_sched__lat(struct perf_sched *sched)
> > return 0;
> > }
> >
> > +static int setup_map_cpus(struct perf_sched *sched)
> > +{
> > + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
> > +
> > + if (sched->map.comp) {
> > + sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
> > + return sched->map.comp_cpus ? 0 : -1;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static int perf_sched__map(struct perf_sched *sched)
> > {
> > - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
> > + if (setup_map_cpus(sched))
> > + return -1;
> >
> > setup_pager();
> > if (perf_sched__read_events(sched))
> > @@ -1831,6 +1867,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> > "dump raw trace in ASCII"),
> > OPT_END()
> > };
> > + const struct option map_options[] = {
> > + OPT_BOOLEAN(0, "compact", &sched.map.comp,
> > + "map output in compact mode"),
> > + OPT_END()
> > + };
> > const char * const latency_usage[] = {
> > "perf sched latency [<options>]",
> > NULL
> > @@ -1839,6 +1880,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> > "perf sched replay [<options>]",
> > NULL
> > };
> > + const char * const map_usage[] = {
> > + "perf sched map [<options>]",
> > + NULL
> > + };
> > const char *const sched_subcommands[] = { "record", "latency", "map",
> > "replay", "script", NULL };
> > const char *sched_usage[] = {
> > @@ -1887,6 +1932,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
> > setup_sorting(&sched, latency_options, latency_usage);
> > return perf_sched__lat(&sched);
> > } else if (!strcmp(argv[0], "map")) {
> > + if (argc) {
> > + argc = parse_options(argc, argv, map_options, replay_usage, 0);
> > + if (argc)
> > + usage_with_options(map_usage, map_options);
> > + }
> > sched.tp_handler = &map_ops;
> > setup_sorting(&sched, latency_options, latency_usage);
> > return perf_sched__map(&sched);
> > --
> > 2.4.11