Re: [PATCH] perf report/annotate: Add option to specify a CPU range

From: David Ahern
Date: Wed Jun 29 2011 - 23:56:14 EST


On 06/29/2011 09:16 PM, Anton Blanchard wrote:
>
> Add an option to perf report/annotate/script to specify which CPUs
> to operate on. This enables us to take a single system wide profile
> and analyse each CPU (or group of CPUs) in isolation.
>
> This was useful when profiling a multiprocess workload where the
> bottleneck was on one CPU but this was hidden in the overall profile.
> Per process and per thread breakdowns didn't help because multiple
> processes were running on each CPU and no single process consumed
> an entire CPU.
>
> The patch converts the list of CPUs returned by cpu_map__new into a
> bitmap for fast lookup. I wanted to use -C to be consistent with perf
> top/record/stat, but unfortunately perf report already uses -C <comms>.
>
> Signed-off-by: Anton Blanchard <anton@xxxxxxxxx>
> ---
>
> v2: Incorporate suggestions from David Ahern:
> - Added -c to perf script
> - Check that SAMPLE_CPU is set when -c is used
> - Update documentation
>
> Index: linux-2.6-tip/tools/perf/builtin-report.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-report.c 2011-06-30 11:35:08.488417534 +1000
> +++ linux-2.6-tip/tools/perf/builtin-report.c 2011-06-30 12:56:28.894807631 +1000
> @@ -33,6 +33,9 @@
> #include "util/sort.h"
> #include "util/hist.h"
>
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
> static char const *input_name = "perf.data";
>
> static bool force, use_tui, use_stdio;
> @@ -48,6 +51,9 @@ static const char *pretty_printing_style
> static char callchain_default_opt[] = "fractal,0.5";
> static symbol_filter_t annotate_init;
>
> +static const char *cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
> static int perf_session__add_hist_entry(struct perf_session *session,
> struct addr_location *al,
> struct perf_sample *sample,
> @@ -116,6 +122,9 @@ static int process_sample_event(union pe
> if (al.filtered || (hide_unresolved && al.sym == NULL))
> return 0;
>
> + if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> + return 0;
> +
> if (al.map != NULL)
> al.map->dso->hit = 1;
>
> @@ -262,6 +271,41 @@ static int __cmd_report(void)
> if (session == NULL)
> return -ENOMEM;
>
> + if (cpu_list) {
> + int i;
> + struct cpu_map *map;
> +
> + for (i = 0; i < PERF_TYPE_MAX; ++i) {
> + struct perf_evsel *evsel;
> +
> + evsel = perf_session__find_first_evtype(session, i);
> + if (!evsel)
> + continue;
> +
> + if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> + pr_err("File does not contain CPU events. "
> + "Remove -c option to proceed.\n");
> + ret = -1;
> + goto out_delete;
> + }
> + }
> +
> + map = cpu_map__new(cpu_list);
> +
> + for (i = 0; i < map->nr; i++) {
> + int cpu = map->map[i];
> +
> + if (cpu >= MAX_NR_CPUS) {
> + pr_err("Requested CPU %d too large. "
> + "Consider raising MAX_NR_CPUS\n", cpu);
> + ret = -1;
> + goto out_delete;
> + }
> +
> + set_bit(cpu, cpu_bitmap);
> + }
> + }
> +

It would be better to make this a function that all 3 commands reference
-- something like perf_session__cpu_bitmap(session, cpu_list,
cpu_bitmap) in util/session.c

David

> if (show_threads)
> perf_read_values_init(&show_threads_values);
>
> @@ -455,6 +499,7 @@ static const struct option options[] = {
> "Only display entries resolved to a symbol"),
> OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
> "Look for files with symbols relative to this directory"),
> + OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
> OPT_END()
> };
>
> Index: linux-2.6-tip/tools/perf/builtin-annotate.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-annotate.c 2011-06-30 11:35:08.468417177 +1000
> +++ linux-2.6-tip/tools/perf/builtin-annotate.c 2011-06-30 12:56:35.514926037 +1000
> @@ -28,6 +28,9 @@
> #include "util/hist.h"
> #include "util/session.h"
>
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
> +
> static char const *input_name = "perf.data";
>
> static bool force, use_tui, use_stdio;
> @@ -38,6 +41,9 @@ static bool print_line;
>
> static const char *sym_hist_filter;
>
> +static const char *cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> +
> static int perf_evlist__add_sample(struct perf_evlist *evlist,
> struct perf_sample *sample,
> struct perf_evsel *evsel,
> @@ -90,6 +96,9 @@ static int process_sample_event(union pe
> return -1;
> }
>
> + if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> + return 0;
> +
> if (!al.filtered &&
> perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
> pr_warning("problem incrementing symbol count, "
> @@ -177,6 +186,41 @@ static int __cmd_annotate(void)
> if (session == NULL)
> return -ENOMEM;
>
> + if (cpu_list) {
> + int i;
> + struct cpu_map *map;
> +
> + for (i = 0; i < PERF_TYPE_MAX; ++i) {
> + struct perf_evsel *evsel;
> +
> + evsel = perf_session__find_first_evtype(session, i);
> + if (!evsel)
> + continue;
> +
> + if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> + pr_err("File does not contain CPU events. "
> + "Remove -c option to proceed.\n");
> + ret = -1;
> + goto out_delete;
> + }
> + }
> +
> + map = cpu_map__new(cpu_list);
> +
> + for (i = 0; i < map->nr; i++) {
> + int cpu = map->map[i];
> +
> + if (cpu >= MAX_NR_CPUS) {
> + pr_err("Requested CPU %d too large. "
> + "Consider raising MAX_NR_CPUS\n", cpu);
> + ret = -1;
> + goto out_delete;
> + }
> +
> + set_bit(cpu, cpu_bitmap);
> + }
> + }
> +
> ret = perf_session__process_events(session, &event_ops);
> if (ret)
> goto out_delete;
> @@ -252,6 +296,7 @@ static const struct option options[] = {
> "print matching source lines (may be slow)"),
> OPT_BOOLEAN('P', "full-paths", &full_paths,
> "Don't shorten the displayed pathnames"),
> + OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
> OPT_END()
> };
>
> Index: linux-2.6-tip/tools/perf/builtin-script.c
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/builtin-script.c 2011-06-30 11:35:08.478417356 +1000
> +++ linux-2.6-tip/tools/perf/builtin-script.c 2011-06-30 12:56:44.185081104 +1000
> @@ -13,6 +13,8 @@
> #include "util/util.h"
> #include "util/evlist.h"
> #include "util/evsel.h"
> +#include <linux/bitmap.h>
> +#include "util/cpumap.h"
>
> static char const *script_name;
> static char const *generate_script_lang;
> @@ -21,6 +23,8 @@ static u64 last_timestamp;
> static u64 nr_unordered;
> extern const struct option record_options[];
> static bool no_callchain;
> +static const char *cpu_list;
> +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
>
> enum perf_output_field {
> PERF_OUTPUT_COMM = 1U << 0,
> @@ -453,6 +457,10 @@ static int process_sample_event(union pe
> last_timestamp = sample->time;
> return 0;
> }
> +
> + if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
> + return 0;
> +
> scripting_ops->process_event(event, sample, evsel, session, thread);
>
> session->hists.stats.total_period += sample->period;
> @@ -1075,6 +1083,7 @@ static const struct option options[] = {
> OPT_CALLBACK('f', "fields", NULL, "str",
> "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
> parse_output_fields),
> + OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
>
> OPT_END()
> };
> @@ -1255,6 +1264,38 @@ int cmd_script(int argc, const char **ar
> if (session == NULL)
> return -ENOMEM;
>
> + if (cpu_list) {
> + struct cpu_map *map;
> +
> + for (i = 0; i < PERF_TYPE_MAX; ++i) {
> + struct perf_evsel *evsel;
> +
> + evsel = perf_session__find_first_evtype(session, i);
> + if (!evsel)
> + continue;
> +
> + if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
> + pr_err("File does not contain CPU events. "
> + "Remove -c option to proceed.\n");
> + return -1;
> + }
> + }
> +
> + map = cpu_map__new(cpu_list);
> +
> + for (i = 0; i < map->nr; i++) {
> + int cpu = map->map[i];
> +
> + if (cpu >= MAX_NR_CPUS) {
> + pr_err("Requested CPU %d too large. "
> + "Consider raising MAX_NR_CPUS\n", cpu);
> + return -1;
> + }
> +
> + set_bit(cpu, cpu_bitmap);
> + }
> + }
> +
> if (!no_callchain)
> symbol_conf.use_callchain = true;
> else
> Index: linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-annotate.txt 2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-annotate.txt 2011-06-30 11:35:19.618616362 +1000
> @@ -66,6 +66,12 @@ OPTIONS
> used. This interfaces starts by centering on the line with more
> samples, TAB/UNTAB cycles through the lines with more samples.
>
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> + be provided as a comma-separated list with no space: 0,1. Ranges of
> + CPUs are specified with -: 0-2. Default is to report samples on all
> + CPUs.
> +
> SEE ALSO
> --------
> linkperf:perf-record[1], linkperf:perf-report[1]
> Index: linux-2.6-tip/tools/perf/Documentation/perf-report.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-report.txt 2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-report.txt 2011-06-30 11:35:19.618616362 +1000
> @@ -119,6 +119,12 @@ OPTIONS
> --symfs=<directory>::
> Look for files with symbols relative to this directory.
>
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> + be provided as a comma-separated list with no space: 0,1. Ranges of
> + CPUs are specified with -: 0-2. Default is to report samples on all
> + CPUs.
> +
> SEE ALSO
> --------
> linkperf:perf-stat[1]
> Index: linux-2.6-tip/tools/perf/Documentation/perf-script.txt
> ===================================================================
> --- linux-2.6-tip.orig/tools/perf/Documentation/perf-script.txt 2011-06-30 11:35:17.768583314 +1000
> +++ linux-2.6-tip/tools/perf/Documentation/perf-script.txt 2011-06-30 11:35:19.618616362 +1000
> @@ -182,6 +182,12 @@ OPTIONS
> --hide-call-graph::
> When printing symbols do not display call chain.
>
> +-c::
> +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
> + be provided as a comma-separated list with no space: 0,1. Ranges of
> + CPUs are specified with -: 0-2. Default is to report samples on all
> + CPUs.
> +
> SEE ALSO
> --------
> linkperf:perf-record[1], linkperf:perf-script-perl[1],
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/