[PATCH 1/2] perf script python: integrate page reclaim analyze script

From: Yafang Shao
Date: Wed Sep 18 2019 - 10:39:15 EST


A new perf script page-reclaim is introduced in this patch. This new script
is used to report the page reclaim details. The possible usage of this
script is as bellow,
- identify latency spike caused by direct reclaim
- whehter the latency spike is relevant with pageout
- why is page reclaim requested, i.e. whether it is because of memory
fragmentation
- page reclaim efficiency
etc
In the future we may also enhance it to analyze the memcg reclaim.

Bellow is how to use this script,
# Record, one of the following
$ perf record -e 'vmscan:mm_vmscan_*' ./workload
$ perf script record page-reclaim

# Report
$ perf script report page-reclaim

# Report per process latency
$ perf script report page-reclaim -- -p

# Report per process latency details. At what time and how long it
# stalls at each time.
$ perf script report page-reclaim -- -v

An example of doing mmtests,
$ perf script report page-reclaim
Direct reclaims: 4924
Direct latency (ms) total max avg min
177823.211 6378.977 36.114 0.051
Direct file reclaimed 22920
Direct file scanned 28306
Direct file sync write I/O 0
Direct file async write I/O 0
Direct anon reclaimed 212567
Direct anon scanned 1446854
Direct anon sync write I/O 0
Direct anon async write I/O 278325
Direct order 0 1 3
4870 23 31
Wake kswapd requests 716
Wake order 0 1
715 1

Kswapd reclaims: 9
Kswapd latency (ms) total max avg min
86353.046 42128.816 9594.783 120.736
Kswapd file reclaimed 366461
Kswapd file scanned 369554
Kswapd file sync write I/O 0
Kswapd file async write I/O 0
Kswapd anon reclaimed 362594
Kswapd anon scanned 693938
Kswapd anon sync write I/O 0
Kswapd anon async write I/O 330663
Kswapd order 0 1 3
3 1 5
Kswapd re-wakes 705

$ perf script report page-reclaim -- -p
# besides the above basic output, it will also summary per task
# latency
Per process latency (ms):
pid[comm] total max avg min
1[systemd] 276.764 248.933 21.29 0.293
163[kswapd0] 86353.046 42128.816 9594.783 120.736
7241[bash] 12787.749 859.091 94.028 0.163
1592[master] 81.604 70.811 27.201 2.906
1595[pickup] 496.162 374.168 165.387 14.478
1098[auditd] 19.32 19.32 19.32 19.32
1120[irqbalance] 5232.331 1386.352 158.555 0.169
7236[usemem] 79649.04 1763.281 24.921 0.051
1605[sshd] 1344.41 645.125 34.472 0.16
7238[bash] 1158.92 1023.307 231.784 0.067
7239[bash] 15100.776 993.447 82.069 0.145
...

$ per script report page-reclaim -- -v
# Besides the basic output, it will asl show per task latency details
Per process latency (ms):
pid[comm] total max avg min
timestamp latency(ns)
1[systemd] 276.764 248.933 21.29 0.293
3406860552338: 16819800
3406877381650: 5532855
3407458799399: 929517
3407459796042: 916682
3407460763220: 418989
3407461250236: 332355
3407461637534: 401731
3407462092234: 449219
3407462605855: 292857
3407462952343: 372700
3407463364947: 414880
3407463829547: 949162
3407464813883: 248933444
163[kswapd0] 86353.046 42128.816 9594.783 120.736
3357637025977: 1026962745
3358915619888: 41268642175
3400239664127: 42128816204
3443784780373: 679641989
3444847948969: 120735792
3445001978784: 342713657
3445835850664: 316851589
3446865035476: 247457873
3449355401352: 221223878
...

This script must be in sync with bellow vmscan tracepoints,
mm_vmscan_direct_reclaim_begin
mm_vmscan_direct_reclaim_end
mm_vmscan_kswapd_wake
mm_vmscan_kswapd_sleep
mm_vmscan_wakeup_kswapd
mm_vmscan_lru_shrink_inactive
mm_vmscan_writepage

Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
---
tools/perf/scripts/python/bin/page-reclaim-record | 2 +
tools/perf/scripts/python/bin/page-reclaim-report | 4 +
tools/perf/scripts/python/page-reclaim.py | 378 ++++++++++++++++++++++
3 files changed, 384 insertions(+)
create mode 100644 tools/perf/scripts/python/bin/page-reclaim-record
create mode 100644 tools/perf/scripts/python/bin/page-reclaim-report
create mode 100644 tools/perf/scripts/python/page-reclaim.py

diff --git a/tools/perf/scripts/python/bin/page-reclaim-record b/tools/perf/scripts/python/bin/page-reclaim-record
new file mode 100644
index 0000000..5a16a23
--- /dev/null
+++ b/tools/perf/scripts/python/bin/page-reclaim-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e vmscan:mm_vmscan_direct_reclaim_begin -e vmscan:mm_vmscan_direct_reclaim_end -e vmscan:mm_vmscan_kswapd_wake -e vmscan:mm_vmscan_kswapd_sleep -e vmscan:mm_vmscan_wakeup_kswapd -e vmscan:mm_vmscan_lru_shrink_inactive -e vmscan:mm_vmscan_writepage $@
diff --git a/tools/perf/scripts/python/bin/page-reclaim-report b/tools/perf/scripts/python/bin/page-reclaim-report
new file mode 100644
index 0000000..b74e197
--- /dev/null
+++ b/tools/perf/scripts/python/bin/page-reclaim-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+#description: display page reclaim details
+#args: [-h] [-p] [-v]
+perf script -s "$PERF_EXEC_PATH"/scripts/python/page-reclaim.py $@
diff --git a/tools/perf/scripts/python/page-reclaim.py b/tools/perf/scripts/python/page-reclaim.py
new file mode 100644
index 0000000..5c0bd64
--- /dev/null
+++ b/tools/perf/scripts/python/page-reclaim.py
@@ -0,0 +1,378 @@
+# SPDX-License-Identifier: GPL-2.0
+# Perf script to help analyze page reclaim with vmscan tracepoints
+# e.g. to capture the latency spike caused by direct reclaim.
+#
+# This script is motivated by Mel's trace-vmscan-postprocess.pl.
+#
+# Author: Yafang Shao <laoar.shao@xxxxxxxxx>
+
+import os
+import sys
+import getopt
+import signal
+
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+usage = "usage: perf script report page-reclaim -- [-h] [-p] [-v]\n"
+
+latency_metric = ['total', 'max', 'avg', 'min']
+reclaim_path = ['Kswapd', 'Direct']
+sync_io = ['async', 'sync']
+lru = ['anon', 'file']
+
+class Show:
+ DEFAULT = 0
+ PROCCESS = 1
+ VERBOSE = 2
+
+show_opt = Show.DEFAULT
+
+def ns(sec, nsec):
+ return (sec * 1000000000) + nsec
+
+def ns_to_ms(ns):
+ return round(ns / 1000000.0, 3)
+
+def print_proc_latency(_list, pid, comm):
+ line = pid.rjust(8)
+ line += comm.ljust(12)
+ line += "".join(map(lambda x: str(x).rjust(12), _list))
+
+ print(line)
+
+def print_stat_list(__list, title, padding):
+ width = len(title) + 1
+ header = title.ljust(width)
+ line = ''.ljust(width)
+
+ for v in __list:
+ header += str(v[0]).rjust(padding)
+ line += str(v[1]).rjust(padding)
+
+ print(header)
+ print(line)
+
+class Trace:
+ def __init__(self, secs, nsecs):
+ self.begin = ns(secs, nsecs)
+
+ def complete(self, secs, nsecs):
+ self.ns = ns(secs, nsecs) - self.begin
+
+class Stat:
+ def __init__(self):
+ self.stats = {}
+ self.stats['file'] = {}
+ self.stats['file']['reclaimed'] = 0
+ self.stats['file']['scanned'] = 0
+ self.stats['file']['sync'] = 0
+ self.stats['file']['async'] = 0
+ self.stats['anon'] = {}
+ self.stats['anon']['reclaimed'] = 0
+ self.stats['anon']['scanned'] = 0
+ self.stats['anon']['sync'] = 0
+ self.stats['anon']['async'] = 0
+
+ # including reclaimed slab caches
+ self.stats['reclaimed'] = 0
+
+ # The MAX_ORDER in kernel is configurable
+ self.stats['order'] = {}
+
+ self.stats['latency'] = {}
+ self.stats['latency']['total'] = 0.0
+ self.stats['latency']['max'] = 0.0
+ self.stats['latency']['avg'] = 0.0
+ self.stats['latency']['min'] = float("inf")
+ self.stats['count'] = 0
+
+ def add_latency(self, val, order):
+ self.stats['latency']['total'] += val
+ _max = self.stats['latency']['max']
+ _min = self.stats['latency']['min']
+ if val > _max:
+ self.stats['latency']['max'] = val
+ if val < _min:
+ self.stats['latency']['min'] = val
+
+ self.stats['count'] += 1
+ self.stats['order'][order] = self.stats['order'].get(order, 0) + 1
+
+ def add_page(self, _lru, scanned, reclaimed):
+ self.stats[_lru]['scanned'] += scanned
+ self.stats[_lru]['reclaimed'] += reclaimed
+
+ def inc_write_io(self, _lru, _io):
+ self.stats[_lru][_io] += 1
+
+ def convert_latency(self):
+ count = self.stats['count']
+ if count:
+ self.stats['latency']['avg'] = \
+ self.stats['latency']['total'] / count
+ for i, v in self.stats['latency'].items():
+ self.stats['latency'][i] = ns_to_ms(v)
+
+ latency_list = sorted(self.stats['latency'].items(),
+ key=lambda i:latency_metric.index(i[0]))
+
+ return latency_list
+
+ def show_stats(self, key):
+ count = self.stats['count']
+ print("%s reclaims: %d" % (key, count))
+
+ # Format latency output
+ # Print latencys in milliseconds:
+ # title total max avg min
+ # v v v v
+ latency_list = self.convert_latency()
+ print_stat_list(latency_list, key + " latency (ms)", 12)
+
+ for _lru in ['file', 'anon']:
+ for action in ['reclaimed', 'scanned']:
+ print("%s %s %s %d" % (key, _lru, action, self.stats[_lru][action]))
+ for _io in ['sync', 'async']:
+ print("%s %s %s write I/O %d" % (key, _lru, _io, self.stats[_lru][_io]))
+
+ # Format order output
+ # Similar with /proc/buddyinfo:
+ # title order-N ...
+ # v ...
+ # N.B. v is a non-zero value
+ order_list = sorted(self.stats['order'].items())
+ print_stat_list(order_list, key + ' order', 6)
+
+class Vmscan:
+ events = {}
+ stat = {}
+ stat['Direct'] = Stat()
+ stat['Kswapd'] = Stat()
+ # for re-wake the kswapd
+ rewake = 0
+
+ @classmethod
+ def direct_begin(cls, pid, comm, start_secs, start_nsecs, order):
+ event = cls.events.get(pid)
+ if event is None:
+ #new vmscan instance
+ event = cls.events[pid] = Vmscan(comm, pid)
+ event.vmscan_trace_begin(start_secs, start_nsecs, order, 1)
+
+ @classmethod
+ def direct_end(cls, pid, secs, nsecs, reclaimed):
+ event = cls.events.get(pid)
+ if event and event.tracing():
+ event.vmscan_trace_end(secs, nsecs)
+
+ @classmethod
+ def kswapd_wake(cls, pid, comm, start_secs, start_nsecs, order):
+ event = cls.events.get(pid)
+ if event is None:
+ # new vmscan instance
+ event = cls.events[pid] = Vmscan(comm, pid)
+
+ if event.tracing() is False:
+ event.vmscan_trace_begin(start_secs, start_nsecs, order, 0)
+ # kswapd is already running
+ else:
+ cls.rewake_kswapd(order)
+
+ @classmethod
+ def rewake_kswapd(cls, order):
+ cls.rewake += 1
+
+ @classmethod
+ def show_rewakes(cls):
+ print("Kswapd re-wakes %d" % (cls.rewake))
+
+ @classmethod
+ def kswapd_sleep(cls, pid, secs, nsecs):
+ event = cls.events.get(pid)
+ if event and event.tracing():
+ event.vmscan_trace_end(secs, nsecs)
+
+ @classmethod
+ def shrink_inactive(cls, pid, scanned, reclaimed, flags):
+ event = cls.events.get(pid)
+ if event and event.tracing():
+ # RECLAIM_WB_ANON 0x1
+ # RECLAIM_WB_FILE 0x2
+ _type = (flags & 0x2) >> 1
+ event.process_lru(lru[_type], scanned, reclaimed)
+
+ @classmethod
+ def writepage(cls, pid, flags):
+ event = cls.events.get(pid)
+ if event and event.tracing():
+ # RECLAIM_WB_ANON 0x1
+ # RECLAIM_WB_FILE 0x2
+ # RECLAIM_WB_SYNC 0x4
+ # RECLAIM_WB_ASYNC 0x8
+ _type = (flags & 0x2) >> 1
+ _io = (flags & 0x4) >> 2
+
+ event.process_writepage(lru[_type], sync_io[_io])
+
+ @classmethod
+ def iterate_proc(cls):
+ if show_opt != Show.DEFAULT:
+ print("\nPer process latency (ms):")
+ print_proc_latency(latency_metric, 'pid', '[comm]')
+
+ if show_opt == Show.VERBOSE:
+ print("%20s %s" % ('timestamp','latency(ns)'))
+
+ for i in cls.events:
+ yield cls.events[i]
+
+ def __init__(self, comm, pid):
+ self.comm = comm
+ self.pid = pid
+ self.trace = None
+ self._list = []
+ self.stat = Stat()
+ self.direct = 0
+ self.order = 0
+
+ def vmscan_trace_begin(self, secs, nsecs, order, direct):
+ self.trace = Trace(secs, nsecs)
+ self.direct = direct
+ self.order = order
+
+ def vmscan_trace_end(self, secs, nsecs):
+ path = reclaim_path[self.direct]
+ self.trace.complete(secs, nsecs)
+
+ Vmscan.stat[path].add_latency(self.trace.ns, self.order)
+ if show_opt != Show.DEFAULT:
+ self.stat.add_latency(self.trace.ns, self.order)
+ if show_opt == Show.VERBOSE:
+ self._list.append(self.trace)
+
+ self.trace = None
+
+ def process_lru(self, lru, scanned, reclaimed):
+ path = reclaim_path[self.direct]
+ Vmscan.stat[path].add_page(lru, scanned, reclaimed)
+
+ def process_writepage(self, lru, io):
+ path = reclaim_path[self.direct]
+ Vmscan.stat[path].inc_write_io(lru, io)
+
+ def tracing(self):
+ return self.trace != None
+
+ def display_proc(self):
+ self.stat.convert_latency()
+ print_proc_latency(sorted(self.stat.stats['latency'].itervalues(),
+ reverse=True),
+ str(self.pid),
+ '[' +self.comm[0:10] + ']')
+
+ def display_proc_detail(self):
+ if show_opt == Show.VERBOSE:
+ for i, v in enumerate(self._list):
+ print("%20d: %d" % (v.begin, v.ns))
+
+# Wake kswpad request
+class Wakeup:
+ wakes = 0
+ orders = {}
+
+ @classmethod
+ def wakeup_kswapd(cls, order):
+ cls.wakes += 1
+ cls.orders[order] = cls.orders.get(order, 0) + 1
+
+ @classmethod
+ def show_wakes(cls):
+ print("Wake kswapd requests %d" % (cls.wakes))
+
+ order_list = sorted(cls.orders.items())
+ print_stat_list(order_list, 'Wake order', 6)
+
+def trace_end():
+ Vmscan.stat['Direct'].show_stats('Direct')
+ Wakeup.show_wakes()
+ print('')
+
+ Vmscan.stat['Kswapd'].show_stats('Kswapd')
+ Vmscan.show_rewakes()
+
+ # show process details if requested
+ for i in Vmscan.iterate_proc():
+ i.display_proc(),
+ i.display_proc_detail()
+
+# These definations must be in sync with the vmscan tracepoints.
+def vmscan__mm_vmscan_direct_reclaim_begin(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, order, gfp_flags):
+
+ Vmscan.direct_begin(common_pid, common_comm, common_secs,
+ common_nsecs, order)
+
+def vmscan__mm_vmscan_direct_reclaim_end(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nr_reclaimed):
+
+ Vmscan.direct_end(common_pid, common_secs, common_nsecs, nr_reclaimed)
+
+def vmscan__mm_vmscan_kswapd_wake(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nid, zid, order):
+
+ Vmscan.kswapd_wake(common_pid, common_comm, common_secs, common_nsecs, order)
+
+def vmscan__mm_vmscan_kswapd_sleep(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nid):
+
+ Vmscan.kswapd_sleep(common_pid, common_secs, common_nsecs)
+
+def vmscan__mm_vmscan_wakeup_kswapd(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nid, zid, order, gfp_flags):
+
+ Wakeup.wakeup_kswapd(order)
+
+def vmscan__mm_vmscan_lru_shrink_inactive(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nid, nr_scanned, nr_reclaimed, nr_dirty,
+ nr_writeback, nr_congested, nr_immediate, nr_activate_anon,
+ nr_activate_file, nr_ref_keep, nr_unmap_fail, priority, flags):
+
+ Vmscan.shrink_inactive(common_pid, nr_scanned, nr_reclaimed, flags)
+
+def vmscan__mm_vmscan_writepage(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, pfn, reclaim_flags):
+
+ Vmscan.writepage(common_pid, reclaim_flags)
+
+def print_help():
+ global usage
+ print(usage)
+ print(" -p show process latency (ms)")
+ print(" -v show process latency (ns) with timestamp")
+
+def option_parse():
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "pvh")
+ except getopt.GetoptError:
+ print('Bad option!')
+ exit(1)
+
+ global show_opt
+ for opt, arg in opts:
+ if opt == "-h":
+ print_help()
+ exit(0)
+ elif opt == "-p":
+ show_opt = Show.PROCCESS
+ elif opt == '-v':
+ show_opt = Show.VERBOSE
+
+option_parse()
+
--
1.8.3.1