Re: [PATCH v4 4/4] perf tools: Support "branch-misses:pp" on arm64

From: James Clark
Date: Mon Feb 24 2020 - 12:08:47 EST


Hi Adrian,

On 2/17/20 11:42 AM, Adrian Hunter wrote:
> On 11/02/20 4:04 pm, James Clark wrote:
>> From: Tan Xiaojun <tanxiaojun@xxxxxxxxxx>
>>
>> At the suggestion of James Clark, use spe to support the precise
>> ip of some events. Currently its support event is:
>> branch-misses.
>>
>> Example usage:
>>
>> $ ./perf record -e branch-misses:pp dd if=/dev/zero of=/dev/null count=10000
>> (:p/pp/ppp is same for this case.)
>>
>> $ ./perf report --stdio
>> ("--stdio is not necessary")
>>
>> --------------------------------------------------------------------
>> ...
>> # Samples: 14 of event 'branch-misses:pp'
>> # Event count (approx.): 14
>> #
>> # Children Self Command Shared Object Symbol
>> # ........ ........ ....... ................. ..........................
>> #
>> 14.29% 14.29% dd [kernel.kallsyms] [k] __arch_copy_from_user
>> 14.29% 14.29% dd libc-2.28.so [.] _dl_addr
>> 7.14% 7.14% dd [kernel.kallsyms] [k] __free_pages
>> 7.14% 7.14% dd [kernel.kallsyms] [k] __pi_memcpy
>> 7.14% 7.14% dd [kernel.kallsyms] [k] pagecache_get_page
>> 7.14% 7.14% dd [kernel.kallsyms] [k] unmap_single_vma
>> 7.14% 7.14% dd dd [.] 0x00000000000025ec
>> 7.14% 7.14% dd ld-2.28.so [.] _dl_lookup_symbol_x
>> 7.14% 7.14% dd ld-2.28.so [.] check_match
>> 7.14% 7.14% dd libc-2.28.so [.] __mpn_rshift
>> 7.14% 7.14% dd libc-2.28.so [.] _nl_intern_locale_data
>> 7.14% 7.14% dd libc-2.28.so [.] read_alias_file
>> ...
>> --------------------------------------------------------------------
>>
>> Signed-off-by: Tan Xiaojun <tanxiaojun@xxxxxxxxxx>
>> Suggested-by: James Clark <James.Clark@xxxxxxx>
>> Tested-by: Qi Liu <liuqi115@xxxxxxxxxxxxx>
>> Signed-off-by: James Clark <james.clark@xxxxxxx>
>> Cc: Will Deacon <will@xxxxxxxxxx>
>> Cc: Mark Rutland <mark.rutland@xxxxxxx>
>> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
>> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
>> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
>> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
>> Cc: Tan Xiaojun <tanxiaojun@xxxxxxxxxx>
>> Cc: Al Grant <al.grant@xxxxxxx>
>> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
>> ---
>> tools/perf/arch/arm/util/auxtrace.c | 38 +++++++++++++++++++++++++++++
>> tools/perf/builtin-record.c | 5 ++++
>> tools/perf/util/arm-spe.c | 9 +++++++
>> tools/perf/util/arm-spe.h | 3 +++
>> tools/perf/util/auxtrace.h | 6 +++++
>> 5 files changed, 61 insertions(+)
>>
>> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
>> index 0a6e75b8777a..18f0ea7556e7 100644
>> --- a/tools/perf/arch/arm/util/auxtrace.c
>> +++ b/tools/perf/arch/arm/util/auxtrace.c
>> @@ -10,11 +10,25 @@
>>
>> #include "../../util/auxtrace.h"
>> #include "../../util/debug.h"
>> +#include "../../util/env.h"
>> #include "../../util/evlist.h"
>> #include "../../util/pmu.h"
>> #include "cs-etm.h"
>> #include "arm-spe.h"
>>
>> +#define SPE_ATTR_TS_ENABLE BIT(0)
>> +#define SPE_ATTR_PA_ENABLE BIT(1)
>> +#define SPE_ATTR_PCT_ENABLE BIT(2)
>> +#define SPE_ATTR_JITTER BIT(16)
>> +#define SPE_ATTR_BRANCH_FILTER BIT(32)
>> +#define SPE_ATTR_LOAD_FILTER BIT(33)
>> +#define SPE_ATTR_STORE_FILTER BIT(34)
>> +
>> +#define SPE_ATTR_EV_RETIRED BIT(1)
>> +#define SPE_ATTR_EV_CACHE BIT(3)
>> +#define SPE_ATTR_EV_TLB BIT(5)
>> +#define SPE_ATTR_EV_BRANCH BIT(7)
>> +
>> static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
>> {
>> struct perf_pmu **arm_spe_pmus = NULL;
>> @@ -108,3 +122,27 @@ struct auxtrace_record
>> *err = 0;
>> return NULL;
>> }
>> +
>> +void auxtrace__preprocess_evlist(struct evlist *evlist)
>> +{
>> + struct evsel *evsel;
>> + struct perf_pmu *pmu;
>> +
>> + evlist__for_each_entry(evlist, evsel) {
>> + /* Currently only supports precise_ip for branch-misses on arm64 */
>> + if (!strcmp(perf_env__arch(evlist->env), "arm64")
>
> Isn't config ambiguous unless you also check type i.e.
>
> && evsel->core.attr.type == PERF_TYPE_HARDWARE
>

Yes you're right I will add this.

>> + && evsel->core.attr.config == PERF_COUNT_HW_BRANCH_MISSES
>> + && evsel->core.attr.precise_ip)
>> + {
>> + pmu = perf_pmu__find("arm_spe_0");
>> + if (pmu) {
>
> Changing the event seems a bit weird.
>

This is because there is no support in the kernel for the precise_ip attribute on Arm.
SPE can give you precise ip data for the same event, but changing the event is required.

>> + evsel->pmu_name = pmu->name;
>> + evsel->core.attr.type = pmu->type;
>> + evsel->core.attr.config = SPE_ATTR_TS_ENABLE
>> + | SPE_ATTR_BRANCH_FILTER;
>> + evsel->core.attr.config1 = SPE_ATTR_EV_BRANCH;
>> + evsel->core.attr.precise_ip = 0;
>> + }
>> + }
>> + }
>> +}
>> \ No newline at end of file
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 4c301466101b..3bc61f03d572 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -2451,6 +2451,11 @@ int cmd_record(int argc, const char **argv)
>>
>> argc = parse_options(argc, argv, record_options, record_usage,
>> PARSE_OPT_STOP_AT_NON_OPTION);
>> +
>> + if (auxtrace__preprocess_evlist) {
>> + auxtrace__preprocess_evlist(rec->evlist);
>> + }
>> +
>> if (quiet)
>> perf_quiet_option();
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 4ef22a0775a9..b21806c97dd8 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -778,6 +778,15 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>> attr.sample_id_all = evsel->core.attr.sample_id_all;
>> attr.read_format = evsel->core.attr.read_format;
>>
>> + /* If it is in the precise ip mode, there is no need to
>> + * synthesize new events. */
>> + if (!strncmp(evsel->name, "branch-misses", 13)) {
>> + spe->sample_branch_miss = true;
>> + spe->branch_miss_id = evsel->core.id[0];
>> +
>> + return 0;
>> + }
>> +
>> /* create new id val to be a fixed offset from evsel id */
>> id = evsel->core.id[0] + 1000000000;
>>
>> diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
>> index 98d3235781c3..8b1fb191d03a 100644
>> --- a/tools/perf/util/arm-spe.h
>> +++ b/tools/perf/util/arm-spe.h
>> @@ -20,6 +20,8 @@ enum {
>> union perf_event;
>> struct perf_session;
>> struct perf_pmu;
>> +struct evlist;
>> +struct evsel;
>>
>> struct auxtrace_record *arm_spe_recording_init(int *err,
>> struct perf_pmu *arm_spe_pmu);
>> @@ -28,4 +30,5 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
>> struct perf_session *session);
>>
>> struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
>> +void arm_spe_precise_ip_support(struct evlist *evlist, struct evsel *evsel);
>> #endif
>> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
>> index 80617b0d044d..4f89a3a31ab2 100644
>> --- a/tools/perf/util/auxtrace.h
>> +++ b/tools/perf/util/auxtrace.h
>> @@ -584,6 +584,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
>> int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
>> void auxtrace__free_events(struct perf_session *session);
>> void auxtrace__free(struct perf_session *session);
>> +void auxtrace__preprocess_evlist(struct evlist *evlist) __attribute__((weak));
>>
>> #define ITRACE_HELP \
>> " i: synthesize instructions events\n" \
>> @@ -728,6 +729,11 @@ void auxtrace__free(struct perf_session *session __maybe_unused)
>> {
>> }
>>
>> +static inline
>> +void auxtrace__preprocess_evlist(struct evlist *evlist __maybe_unused)
>> +{
>> +}
>> +
>> static inline
>> int auxtrace_index__write(int fd __maybe_unused,
>> struct list_head *head __maybe_unused)
>>
>