Re: [QUESTION] support perf record --call-graph dwarf for mips

From: Tiezhu Yang
Date: Mon Dec 21 2020 - 03:13:42 EST


On 12/17/2020 08:48 PM, Tiezhu Yang wrote:
On 12/16/2020 11:16 PM, Arnaldo Carvalho de Melo wrote:
Em Wed, Dec 16, 2020 at 11:30:47AM -0300, Arnaldo Carvalho de Melo escreveu:
Em Wed, Dec 16, 2020 at 07:14:02PM +0800, Jiaxun Yang escreveu:

在 2020/12/16 下午6:05, Tiezhu Yang 写道:
Hi,

In the current upstream mainline kernel, perf record --call-graph dwarf
is not supported for architecture mips64. I find the following related
patches about this feature by David Daney <david.daney@xxxxxxxxxx> and
Archer Yan <ayan@xxxxxxxxxxxx> in Sep 2019.

...
(3)[loongson@linux perf]$ ./perf record --call-graph dwarf cd
Error:
The sys_perf_event_open() syscall returned with 89 (Function not implemented) for event (cycles:u).
/bin/dmesg | grep -i perf may provide additional information.

Call Trace:
record__open
evsel__open()
evsel__open_cpu()
perf_event_open()
evsel__open_strerror

Maybe we need tools/perf/arch/mips/entry/syscalls/syscall.tbl?

The code about mips kernel and perf tool are debugged successfully
on the Loongson 3A4000 CPU platform, we can see the following result,
I will make and submit some patches based on 5.11-rc1 in the next week.

[root@linux perf]# uname -r
5.10.0-rc7
[root@linux perf]# ./perf record --call-graph dwarf -F 1000 lscpu
Architecture: mips64
Byte Order: Little Endian
CPU(s): 4
On-line CPU(s) list: 0-3
Thread(s) per core: 1
Core(s) per socket: 4
Socket(s): 1
NUMA node(s): 1
L1d cache: 64K
L1i cache: 64K
L2 cache: 2048K
NUMA node0 CPU(s): 0-3
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.078 MB perf.data (8 samples) ]
[root@linux perf]# ./perf report
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 8 of event 'cycles'
# Event count (approx.): 5682386
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................ ...........................
#
94.86% 94.86% lscpu [kernel.vmlinux] [k] get_page_from_freelist
|
---__GI_access (inlined)
syscall_common
do_faccessat
filename_lookup
path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu libc-2.20.so [.] __GI_access (inlined)
|
---__GI_access (inlined)
syscall_common
do_faccessat
filename_lookup
path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] syscall_common
|
---syscall_common
do_faccessat
filename_lookup
path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] do_faccessat
|
---do_faccessat
filename_lookup
path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] filename_lookup
|
---filename_lookup
path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] path_lookupat
|
---path_lookupat
walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] walk_component
|
---walk_component
__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] __lookup_slow
|
---__lookup_slow
d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] d_alloc_parallel
|
---d_alloc_parallel
d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] d_alloc
|
---d_alloc
__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] __d_alloc
|
---__d_alloc
kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] kmem_cache_alloc
|
---kmem_cache_alloc
__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] __slab_alloc.isra.96
|
---__slab_alloc.isra.96
___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] ___slab_alloc
|
---___slab_alloc
allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] allocate_slab
|
---allocate_slab
__alloc_pages_nodemask
get_page_from_freelist

94.86% 0.00% lscpu [kernel.vmlinux] [k] __alloc_pages_nodemask
|
---__alloc_pages_nodemask
get_page_from_freelist

5.00% 5.00% lscpu ld-2.20.so [.] dl_main
|
---dl_main

0.13% 0.13% lscpu [kernel.vmlinux] [k] perf_event_comm_output
0.13% 0.00% lscpu [kernel.vmlinux] [k] merge_sched_in
0.13% 0.00% lscpu [kernel.vmlinux] [k] event_sched_in.isra.132
0.00% 0.00% perf [kernel.vmlinux] [k] arch_local_irq_restore

#
# (Tip: List events using substring match: perf list <keyword>)
#