Re: [PATCH v2 2/2] perf-stat: enable counting events for BPF programs

From: Song Liu
Date: Mon Dec 07 2020 - 20:38:10 EST




> On Dec 7, 2020, at 2:07 PM, Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
>
> On Thu, Dec 03, 2020 at 10:13:10PM -0800, Song Liu wrote:
>
> SNIP
>
>> +#include "bpf_skel/bpf_prog_profiler.skel.h"
>> +
>> +static inline void *u64_to_ptr(__u64 ptr)
>> +{
>> + return (void *)(unsigned long)ptr;
>> +}
>> +
>> +static void set_max_rlimit(void)
>> +{
>> + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
>> +
>> + setrlimit(RLIMIT_MEMLOCK, &rinf);
>> +}
>> +
>> +static inline struct bpf_counter *bpf_counter_alloc(void)
>
> why is this inlined?

We don't need the inline here. I will remove it in the next version.

>
> SNIP
>
>> +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
>> +{
>> + struct bpf_prog_profiler_bpf *skel;
>> + struct bpf_counter *counter;
>> + struct bpf_program *prog;
>> + char *prog_name;
>> + int prog_fd;
>> + int err;
>> +
>> + prog_fd = bpf_prog_get_fd_by_id(prog_id);
>> + if (prog_fd < 0) {
>> + pr_debug("Failed to open fd for bpf prog %u\n", prog_id);
>> + return -1;
>> + }
>> + counter = bpf_counter_alloc();
>> + if (!counter)
>> + return -1;
>> +
>> + skel = bpf_prog_profiler_bpf__open();
>> + if (!skel) {
>> + pr_debug("Failed to load bpf skeleton\n");
>
> I'm still getting
>
> [root@dell-r440-01 perf]# ./perf stat -b 38
> libbpf: elf: skipping unrecognized data section(9) .eh_frame
> libbpf: elf: skipping relo section(15) .rel.eh_frame for section(9) .eh_frame
> libbpf: XXX is not found in vmlinux BTF
> libbpf: failed to load object 'bpf_prog_profiler_bpf'
> libbpf: failed to load BPF skeleton 'bpf_prog_profiler_bpf': -2
> ...
>
> with id 38 being:
>
> 38: tracepoint name sys_enter tag 03418b72a610af75 gpl
> loaded_at 2020-12-07T22:54:05+0100 uid 0
> xlated 272B jited 153B memlock 4096B map_ids 1
>
> how is this supposed to work when there's XXX in the
> program's section? libbpf is trying to find XXX in
> kernel BTF and fails of course

I think this is because this program doesn't have BTF. The actual failed
function was bpf_program__set_attach_target(). So the error message above
should be "Failed to _open_ bpf skeleton". I will fix the error messages.

>
>
>> + free(counter);
>> + return -1;
>> + }
>> + skel->rodata->num_cpu = evsel__nr_cpus(evsel);
>> +
>> + bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
>> + bpf_map__resize(skel->maps.fentry_readings, 1);
>> + bpf_map__resize(skel->maps.accum_readings, 1);
>> +
>
> SNIP
>
>> +static int bpf_program_profiler__read(struct evsel *evsel)
>> +{
>> + int num_cpu = evsel__nr_cpus(evsel);
>> + struct bpf_perf_event_value values[num_cpu];
>> + struct bpf_counter *counter;
>> + int reading_map_fd;
>> + __u32 key = 0;
>> + int err, cpu;
>> +
>> + if (list_empty(&evsel->bpf_counter_list))
>> + return -EAGAIN;
>> +
>> + for (cpu = 0; cpu < num_cpu; cpu++) {
>> + perf_counts(evsel->counts, cpu, 0)->val = 0;
>> + perf_counts(evsel->counts, cpu, 0)->ena = 0;
>> + perf_counts(evsel->counts, cpu, 0)->run = 0;
>> + }
>> + list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
>> + struct bpf_prog_profiler_bpf *skel = counter->skel;
>> +
>> + reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
>> +
>> + err = bpf_map_lookup_elem(reading_map_fd, &key, values);
>> + if (err) {
>> + fprintf(stderr, "failed to read value\n");
>> + return err;
>> + }
>> +
>> + for (cpu = 0; cpu < num_cpu; cpu++) {
>> + perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
>> + perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
>> + perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
>> + }
>
> so we sum everything up for all provided bpf IDs,
> should we count/display them separately?

I think that's the default behavior with --pid x,y,z or --cpu a,b,c.
Do we need to separate them?

>
> SNIP
>
>> +SEC("fentry/XXX")
>> +int BPF_PROG(fentry_XXX)
>> +{
>> + __u32 key = bpf_get_smp_processor_id();
>> + struct bpf_perf_event_value reading;
>> + struct bpf_perf_event_value *ptr;
>> + __u32 zero = 0;
>> + long err;
>> +
>> + /* look up before reading, to reduce error */
>> + ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
>> + if (!ptr)
>> + return 0;
>> +
>> + err = bpf_perf_event_read_value(&events, key, &reading,
>> + sizeof(reading));
>
> can't we read directly to ptr in here?

Yes, we can! Thanks for catching this.

>
> SNIP
>
>> /* THREAD and SYSTEM/CPU are mutually exclusive */
>> if (target->per_thread && (target->system_wide || target->cpu_list)) {
>> target->per_thread = false;
>> @@ -109,6 +137,10 @@ static const char *target__error_str[] = {
>> "PID/TID switch overriding SYSTEM",
>> "UID switch overriding SYSTEM",
>> "SYSTEM/CPU switch overriding PER-THREAD",
>> + "BPF switch overriding CPU",
>> + "BPF switch overriding PID/TID",
>> + "BPF switch overriding UID",
>> + "BPF switch overriding THREAD",
>> "Invalid User: %s",
>> "Problems obtaining information for user %s",
>> };
>> @@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum,
>>
>> switch (errnum) {
>> case TARGET_ERRNO__PID_OVERRIDE_CPU ...
>> - TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:
>
> hum, this should stay, no?

We need this to show the warning like:

~/perf stat -e cycles,instructions -b 245561 -C 0
BPF switch overriding CPU
...

Thanks,
Song