Re: [PATCH v2 01/17] perf tools: Configure contextID tracing in CPU-wide mode
From: Mathieu Poirier
Date: Fri Jun 07 2019 - 13:45:04 EST
Hey Suzuki,
On Fri, 7 Jun 2019 at 03:21, Suzuki K Poulose <suzuki.poulose@xxxxxxx> wrote:
>
> Hi Mathieu,
>
> On 24/05/2019 18:34, Mathieu Poirier wrote:
> > When operating in CPU-wide mode being notified of contextID changes is
> > required so that the decoding mechanic is aware of the process context
> > switch.
> >
> > Signed-off-by: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx>
>
>
> > Reviewed-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
>
> I am sorry but, I don't remember reviewing this patch in the previous
> postings. But here we go.
We definitely misunderstood each other - apologies for that.
>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 126 +++++++++++++++++++++++++-----
> > tools/perf/util/cs-etm.h | 12 +++
> > 2 files changed, 119 insertions(+), 19 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 911426721170..3912f0bf04ed 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -35,8 +35,100 @@ struct cs_etm_recording {
> > size_t snapshot_size;
> > };
> >
>
>
> > static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
> >
> > +static int cs_etm_set_context_id(struct auxtrace_record *itr,
> > + struct perf_evsel *evsel, int cpu)
> > +{
> > + struct cs_etm_recording *ptr;
> > + struct perf_pmu *cs_etm_pmu;
> > + char path[PATH_MAX];
> > + int err = -EINVAL;
> > + u32 val;
> > +
> > + ptr = container_of(itr, struct cs_etm_recording, itr);
> > + cs_etm_pmu = ptr->cs_etm_pmu;
> > +
> > + if (!cs_etm_is_etmv4(itr, cpu))
> > + goto out;
> > +
> > + /* Get a handle on TRCIRD2 */
> > + snprintf(path, PATH_MAX, "cpu%d/%s",
> > + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
> > + err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
> > +
> > + /* There was a problem reading the file, bailing out */
> > + if (err != 1) {
> > + pr_err("%s: can't read file %s\n",
> > + CORESIGHT_ETM_PMU_NAME, path);
> > + goto out;
> > + }
> > +
> > + /*
> > + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
> > + * is supported:
> > + * 0b00000 Context ID tracing is not supported.
> > + * 0b00100 Maximum of 32-bit Context ID size.
> > + * All other values are reserved.
> > + */
> > + val = BMVAL(val, 5, 9);
> > + if (!val || val != 0x4) {
> > + err = -EINVAL;
> > + goto out;
> > + }
> > +
> > + /* All good, let the kernel know */
> > + evsel->attr.config |= (1 << ETM_OPT_CTXTID);
> > + err = 0;
> > +
> > +out:
> > +
> > + return err;
> > +}
> > +
> > +static int cs_etm_set_option(struct auxtrace_record *itr,
> > + struct perf_evsel *evsel, u32 option)
> > +{
> > + int i, err = -EINVAL;
> > + struct cpu_map *event_cpus = evsel->evlist->cpus;
> > + struct cpu_map *online_cpus = cpu_map__new(NULL);
> > +
> > + /* Set option of each CPU we have */
> > + for (i = 0; i < cpu__max_cpu(); i++) {
> > + if (!cpu_map__has(event_cpus, i) ||
> > + !cpu_map__has(online_cpus, i))
> > + continue;
> > +
> > + switch (option) {
> > + case ETM_OPT_CTXTID:
> > + err = cs_etm_set_context_id(itr, evsel, i);
> > + if (err)
> > + goto out;
> > + break;
> > + default:
> > + goto out;
> > + }
> > + }
>
> I am not too familiar with the perf tool code. But, isn't there a way
> to force the config bit, right from the beginning when the events are
> created, when we know that we are doing a CPU wide tracing, along with
> the other config bits ?
>
>
This code is ran just after the event list is created. In order to
avoid this step and have the config bits set right from the beginning
one would have to explicitly specify the options within the '/' '/' of
the cs_etm event on the command line, which would be cumbersome and
error prone. Instead this code guarantees that all options needed for
a CPU-wide session are set properly.
>
> > + err = 0;
> > +out:
> > + cpu_map__put(online_cpus);
> > + return err;
> > +}
> > +
> > static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
> > struct record_opts *opts,
> > const char *str)
> > @@ -105,8 +197,9 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > container_of(itr, struct cs_etm_recording, itr);
> > struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
> > struct perf_evsel *evsel, *cs_etm_evsel = NULL;
> > - const struct cpu_map *cpus = evlist->cpus;
> > + struct cpu_map *cpus = evlist->cpus;
> > bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
> > + int err = 0;
> >
> > ptr->evlist = evlist;
> > ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
> > @@ -241,19 +334,24 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> >
> > /*
> > * In the case of per-cpu mmaps, we need the CPU on the
> > - * AUX event.
> > + * AUX event. We also need the contextID in order to be notified
> > + * when a context switch happened.
> > */
> > - if (!cpu_map__empty(cpus))
> > + if (!cpu_map__empty(cpus)) {
> > perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
> >
> > + err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID);
> > + if (err)
> > + goto out;
> > + }
> > +
> > /* Add dummy event to keep tracking */
> > if (opts->full_auxtrace) {
> > struct perf_evsel *tracking_evsel;
> > - int err;
> >
> > err = parse_events(evlist, "dummy:u", NULL);
> > if (err)
> > - return err;
> > + goto out;
> >
> > tracking_evsel = perf_evlist__last(evlist);
> > perf_evlist__set_tracking_event(evlist, tracking_evsel);
> > @@ -266,7 +364,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > perf_evsel__set_sample_bit(tracking_evsel, TIME);
> > }
> >
> > - return 0;
> > +out:
> > + return err;
> > }
>
>
> > diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> > index 0e97c196147a..826c9eedaf5c 100644
> > --- a/tools/perf/util/cs-etm.h
> > +++ b/tools/perf/util/cs-etm.h
> > @@ -103,6 +103,18 @@ struct intlist *traceid_list;
> > #define KiB(x) ((x) * 1024)
> > #define MiB(x) ((x) * 1024 * 1024)
> >
> > +/*
> > + * Create a contiguous bitmask starting at bit position @l and ending at
> > + * position @h. For example
> > + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
> > + *
> > + * Carbon copy of implementation found in $KERNEL/include/linux/bitops.h
> > + */
> > +#define GENMASK(h, l) \
> > + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
> > +
>
> minor nit: Could this be placed in a more generic header file for the other
> parts of the perf tool to consume ?
Back when I wrote this code my thinking was to keep it private since
nobody else in the perf tools had a need for it. But I now that
Arnaldo added the header back in August [1] there is no need for a
private version.
Arnaldo, do you want a patch on top of the current patchset or a new set?
[1]. ba4aa02b417f0 (Arnaldo Carvalho de Melo 2018-09-25 10:55:59 -0300
17) * GENMASK_ULL(39, 21)
>
> > +#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
> > +
>
>
> Cheers
> Suzuki