Re: [PATCH] perf script: add script to profile and resolve physical mem type

From: Stephane Eranian
Date: Tue Oct 17 2017 - 15:23:03 EST


On Mon, Oct 16, 2017 at 3:26 PM, <kan.liang@xxxxxxxxx> wrote:
> From: Kan Liang <Kan.liang@xxxxxxxxx>
>
> There could be different types of memory in the system. E.g normal
> System Memory, Persistent Memory. To understand how the workload maps to
> those memories, it's important to know the I/O statistics on different
> type of memorys. Perf can collect address maps with physical addresses,
> but those are raw data. It still needs extra work to resolve the
> physical addresses.
> Providing a script to facilitate the physical addresses resolving and
> I/O statistics.
>
> Profiling with mem-loads and mem-stores if they are available.
> Looking up the physical address samples in /proc/iomem
> Providing memory type summary
>
> Here is an example
> #perf script record mem-phys-addr -- pmem_test_kernel
> [ perf record: Woken up 32 times to write data ]
> [ perf record: Captured and wrote 7.797 MB perf.data (101995 samples) ]
> #perf script report mem-phys-addr
> Memory type summary
>
> Event: mem-loads
> Memory type count percentage
> ---------------------------------------- ----------- -----------
> Persistent Memory 43740 60.6%
> System RAM 27179 37.7%
> N/A 1268 1.8%
>
> Event: mem-stores
> Memory type count percentage
> ---------------------------------------- ----------- -----------
> System RAM 24508 82.2%
> N/A 5140 17.2%
> Persistent Memory 160 0.5%
>
> Signed-off-by: Kan Liang <Kan.liang@xxxxxxxxx>
> ---
> tools/perf/scripts/python/bin/mem-phys-addr-record | 30 ++++++
> tools/perf/scripts/python/bin/mem-phys-addr-report | 3 +
> tools/perf/scripts/python/mem-phys-addr.py | 109 +++++++++++++++++++++
> .../util/scripting-engines/trace-event-python.c | 2 +
> 4 files changed, 144 insertions(+)
> create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-record
> create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-report
> create mode 100644 tools/perf/scripts/python/mem-phys-addr.py
>
> diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-record b/tools/perf/scripts/python/bin/mem-phys-addr-record
> new file mode 100644
> index 0000000..395b256
> --- /dev/null
> +++ b/tools/perf/scripts/python/bin/mem-phys-addr-record
> @@ -0,0 +1,30 @@
> +#!/bin/bash
> +
> +#
> +# Profiling physical memory accesses
> +#
> +
> +load=`perf list pmu | grep mem-loads`
> +store=`perf list pmu | grep mem-stores`
> +if [ -z "$load" ] && [ -z "$store" ] ; then
> + echo "There is no mem-loads or mem-stores support"
> + exit 1
> +fi
> +
> +arg="-e"
> +if [ ! -z "$store" ] ; then
> + arg="$arg mem-stores:P"
> +fi
> +
> +if [ ! -z "$load" ] ; then
> + if [ ! -z "$store" ] ; then
> + arg="$arg,mem-loads:P"
> + else
> + arg="$arg mem-loads:P"
> + fi
> + arg="$arg -W"
> +fi
> +
> +arg="$arg -d --phys-data"
> +
> +perf record $arg $@
> diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-report b/tools/perf/scripts/python/bin/mem-phys-addr-report
> new file mode 100644
> index 0000000..3f2b847
> --- /dev/null
> +++ b/tools/perf/scripts/python/bin/mem-phys-addr-report
> @@ -0,0 +1,3 @@
> +#!/bin/bash
> +# description: resolve physical address samples
> +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py
> diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
> new file mode 100644
> index 0000000..73b3a63
> --- /dev/null
> +++ b/tools/perf/scripts/python/mem-phys-addr.py
> @@ -0,0 +1,109 @@
> +# mem-phys-addr.py: Resolve physical address samples
> +# Copyright (c) 2017, Intel Corporation.
> +#
> +# This program is free software; you can redistribute it and/or modify it
> +# under the terms and conditions of the GNU General Public License,
> +# version 2, as published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope it will be useful, but WITHOUT
> +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> +# more details.
> +
> +from __future__ import division
> +import os
> +import sys
> +import struct
> +import re
> +import bisect
> +import collections
> +
> +sys.path.append(os.environ['PERF_EXEC_PATH'] + \
> + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
> +
> +system_ram = []
> +pmem = []
> +f = None
> +load_event = ('mem-loads', '0x1cd')
> +store_event = ('mem-stores', '0x82d0');

I don't like the fact that you are mixing DLA and Load Latency.
The two mechanism do not operate the same way and any attempt
at comparing the loads and stores lead to misinterpretations.

> +load_mem_type_cnt = collections.Counter()
> +store_mem_type_cnt = collections.Counter()
> +
> +def parse_iomem():
> + global f
> + f = open('/proc/iomem', 'r')
> + for i, j in enumerate(f):
> + m = re.split('-|:',j,2)
> + if m[2].strip() == 'System RAM':
> + system_ram.append(long(m[0], 16))
> + system_ram.append(long(m[1], 16))
> + if m[2].strip() == 'Persistent Memory':
> + pmem.append(long(m[0], 16))
> + pmem.append(long(m[1], 16))
> +
> +def print_memory_type():
> + print "Memory type summary\n"
> + print "Event: mem-loads"
> + print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"),
> + print "%-40s %10s %10s\n" % ("----------------------------------------", \
> + "-----------", "-----------"),
> + total = sum(load_mem_type_cnt.values())
> + for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
> + key = lambda(k, v): (v, k), reverse = True):
> + print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
> + print "\n\n"
> + print "Event: mem-stores"
> + print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"),
> + print "%-40s %10s %10s\n" % ("----------------------------------------", \
> + "-----------", "-----------"),
> + total = sum(store_mem_type_cnt.values())
> + for mem_type, count in sorted(store_mem_type_cnt.most_common(), \
> + key = lambda(k, v): (v, k), reverse = True):
> + print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
> +
> +def trace_begin():
> + parse_iomem()
> +
> +def trace_end():
> + print_memory_type()
> + f.close()
> +
> +def is_system_ram(phys_addr):
> + #/proc/iomem is sorted
> + position = bisect.bisect(system_ram, phys_addr)
> + if position % 2 == 0:
> + return False
> + return True
> +
> +def is_persistent_mem(phys_addr):
> + position = bisect.bisect(pmem, phys_addr)
> + if position % 2 == 0:
> + return False
> + return True
> +
> +def find_memory_type(phys_addr):
> + if phys_addr == 0:
> + return "N/A"
> + if is_system_ram(phys_addr):
> + return "System RAM"
> +
> + if is_persistent_mem(phys_addr):
> + return "Persistent Memory"
> +
> + #slow path, search all
> + f.seek(0, 0)
> + for j in f:
> + m = re.split('-|:',j,2)
> + if long(m[0], 16) <= phys_addr <= long(m[1], 16):
> + return m[2]
> + return "N/A"
> +
> +def process_event(param_dict):
> + name = param_dict["ev_name"]
> + sample = param_dict["sample"]
> + phys_addr = sample["phys_addr"]
> +
> + if any(x in name for x in load_event):
> + load_mem_type_cnt[find_memory_type(phys_addr)] += 1
> + if any(x in name for x in store_event):
> + store_mem_type_cnt[find_memory_type(phys_addr)] += 1
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index c7187f0..8cd6317 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -500,6 +500,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
> PyLong_FromUnsignedLongLong(sample->time));
> pydict_set_item_string_decref(dict_sample, "period",
> PyLong_FromUnsignedLongLong(sample->period));
> + pydict_set_item_string_decref(dict_sample, "phys_addr",
> + PyLong_FromUnsignedLongLong(sample->phys_addr));
> set_sample_read_in_dict(dict_sample, sample, evsel);
> pydict_set_item_string_decref(dict, "sample", dict_sample);
>
> --
> 2.7.4
>