[RFC PATCH -tip 0/1] tracepoints: pagecache tracepoints proposal
From: Keiichi KII
Date: Fri Dec 11 2009 - 19:42:59 EST
Hello,
I would propose several tracepoints for tracing pagecache behaviors.
By using the tracepoints, we can monitor pagecache usage with high resolution.
-----------------------------------------------------------------------------
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
postmaster-7293 [002] 104039.093744: find_get_page: s_dev=8:2 i_ino=19672
42 offset=22302 page_found
postmaster-7293 [000] 104047.138110: add_to_page_cache: s_dev=8:2 i_ino=1
967242 offset=5672
postmaster-7293 [000] 104072.590885: remove_from_page_cache: s_dev=8:2 i_
ino=5016146 offset=1
-----------------------------------------------------------------------------
We can now know system-wide pagecache usage by /proc/meminfo.
But we have no method to get higher resolution information like per file or
per process usage than system-wide one.
A process may share some pagecache or add a pagecache to the memory or
remove a pagecache from the memory.
If a pagecache miss hit ratio rises, maybe it leads to extra I/O and
affects system performance.
So, by using the tracepoints we can get the following information.
1. how many pagecaches each process has per each file
2. how many pages are cached per each file
3. how many pagecaches each process shares
4. how often each process adds/removes pagecache
5. how long a pagecache stays in the memory
6. pagecache hit rate per file
Especially, the monitoring pagecache usage per each file would help us tune
some application like database.
I attach a sample script for counting file-by-file pagecache usage per process.
The scripts processes raw data from <debugfs>/tracing/trace to get
human-readable output.
You can run it as:
# echo 1 > <debugfs>/tracing/events/filemap
# cat <debugfs>/tracing/trace | python trace-pagecache-postprocess.py
The script implements counting 1, 2 and 3 information in the above.
o script output format
[file list]
< pagecache usage on a file basis >
...
[process list]
process: < pagecache usage of this process >
dev: < pagecache usage of above process on this file >
...
...
For example:
The below output is pagecache usage when pgbench(benchmark tests on PostgreSQL)
runs.
An inode 1967121 is a part of file(75M) for PostgreSQL database.
An inode 5019039 is a part of exec file(2.9M) for PostgreSQL,
"/usr/bin/postgres".
- if "added"(L8) > "cached"(L2) then
It means repeating add/remove pagecache many times.
=> Bad case for pagecache usage
- if "cached"(L3) >= "added"(L9)) && "cached"(L6) > 0 then
It means no unnecessary I/O operations.
=> Good case for pagecache usage.
(the "L2" means that second line in the output, "2: dev:8:2, ...".)
-----------------------------------------------------------------------------
1: [file list]
2: dev:8:2, inode:1967121, cached: 13M
3: dev:8:2, inode:5019039, cached: 1M
4: [process list]
5: process: kswapd0-369 (cached:0K, added:0K, removed:0K, indirect removed:10M)
6: dev:8:2, inode:1967121, cached:0K, added:0K, removed:0K, indirect removed:10M
7: process: postmaster-5025 (cached:23M, added:26M, removed:616K, indirect removed:176K)
8: dev:8:2, inode:1967121, cached:22M, added:26M, removed:616K, indirect removed:0K
9: dev:8:2, inode:5019039, cached:1M, added:64K, removed:0K, indirect removed:176K
10: process: dd-5028 (cached:0K, added:0K, removed:0K, indirect removed:1M)
11: dev:8:2, inode:1967121, cached:0K, added:0K, removed:0K, indirect removed:848K
12: dev:8:2, inode:5019039, cached:0K, added:0K, removed:0K, indirect removed:396K
-----------------------------------------------------------------------------
Any comments are welcome.
--
Keiichi Kii <k-keiichi@xxxxxxxxxxxxx>
#!/usr/bin/env python
import sys, re, getopt
class Recorder:
regex_data = re.compile("s_dev=(.*) i_ino=(.*) offset=(.*)")
regex_data_for_find = re.compile("s_dev=(.*) i_ino=(.*) offset=(.*) (.*)")
def __init__(self):
self.__processes = {}
self.__files = {}
self.__records = {}
(self.__tprocess, self.__tdev, self.__tinode) = (None, None, None)
def get_files(self):
return self.__files.values()
def get_processes(self):
return self.__processes.values()
def get_process(self, name):
if not name in self.__processes:
self.__processes[name] = Process(name)
return self.__processes[name]
def set_target(self, process, dev, inode):
(self.__tprocess, self.__tdev, self.__tinode ) = (process, dev, inode)
def get_file(self, dev, inode):
key = (dev, inode)
if not key in self.__files:
self.__files[key] = File(dev, inode)
return self.__files[key]
def get_records(self):
return self.__records
def get_record(self, process, dev, inode):
if self.__tprocess and self.__tprocess != process or \
self.__tdev and self.__tdev != dev or \
self.__tinode and self.__tinode != inode:
return None
p = self.get_process(process)
f = self.get_file(dev, inode)
key = (p, f)
if not key in self.__records:
self.__records[key] = Record(p, f)
return self.__records[key]
def record_find_get_page(self, process, data):
m = Recorder.regex_data_for_find.match(data)
if not m:
print "find_get_page() illegal format:%s" % data
(dev, inode, index, flag) = m.groups()
if flag == "page_found":
r = self.get_record(process, dev, inode)
if r != None:
r.access_to_cached_page(index)
r.get_file().add_page(index)
def record_add_to_page_cache(self, process, data):
m = Recorder.regex_data.match(data)
(dev, inode, index) = m.groups()
r = self.get_record(process, dev, inode)
if r != None:
r.add_to_page_cache(index)
r.get_file().add_page(index)
def record_remove_from_page_cache(self, process, data):
m = Recorder.regex_data.match(data)
(dev, inode, index) = m.groups()
r = self.get_record(process, dev, inode)
if r != None:
r.remove_from_page_cache(index)
r.get_file().remove_page(index)
class Record:
def __init__(self, process, file):
self.__process = process
self.__process.add_record(self)
self.__file = file
self.__file.add_record(self)
self.__added_page_list = []
self.__removed_page_list = []
self.__indirect_removed_page_list = []
self.__cached_page_list = []
def is_added(self, index):
if index in self.__added_page_list and index in self.__cached_page_list:
return True
else:
return False
def get_file(self):
return self.__file
def count_cached_page(self):
return len(self.__cached_page_list)
def count_added_page(self):
return len(self.__added_page_list)
def count_removed_page(self):
return len(self.__removed_page_list)
def count_indirect_removed_page(self):
return len(self.__indirect_removed_page_list)
def access_to_cached_page(self, index):
self.__cached_page_list.append(index)
def add_to_page_cache(self, index):
self.__added_page_list.append(index)
def remove_from_page_cache(self, index):
if self.is_added(index):
self.__removed_page_list.append(index)
self.__cached_page_list.remove(index)
else:
self.__indirect_removed_page_list.append(index)
def __str__(self):
return "p=%s,f=%s,cached:%d,added:%d,removed:%d,indirect_removed:%d" % \
(self.__process.name, self.__file.dev + "," + self.__file.inode, \
self.count_cached_page(), self.count_added_page(), \
self.count_removed_page(), self.count_indirect_removed_page())
class File:
def __init__(self, dev, inode):
self.__dev = dev
self.__inode = inode
self.__cached_page_list = []
self.__records = []
self.__reliable = True
def get_dev(self):
return self.__dev
def get_inode(self):
return self.__inode
def add_record(self, record):
self.__records.append(record)
def remove_page(self, index):
if index in self.__cached_page_list:
self.__cached_page_list.remove(index)
else:
self.__reliable = False
def add_page(self, index):
if not index in self.__cached_page_list:
self.__cached_page_list.append(index)
def count_cached_page(self):
return len(self.__cached_page_list)
class Process:
def __init__(self, name):
self.__name = name
self.__records = []
def get_name(self):
return self.__name
def add_record(self, record):
self.__records.append(record)
def get_records(self):
return self.__records
def sum_cached_page(self):
return reduce(lambda s,r : s+r.count_cached_page(), self.__records, 0)
def sum_added_page(self):
return reduce(lambda s,r : s+r.count_added_page(), self.__records, 0)
def sum_removed_page(self):
return reduce(lambda s,r : s+r.count_removed_page(), self.__records, 0)
def sum_indirect_removed_page(self):
return reduce(lambda s,r : s+r.count_indirect_removed_page(), \
self.__records, 0)
def convert_to_size(num):
PAGE_SIZE=4
units = ['K', 'M', 'G', 'T']
s = num * PAGE_SIZE
for u in units:
s, m = divmod(s, 1000)
if s == 0:
return str(m) + u
return str(num * PAGE_SIZE) + "K"
def process_events(recorder, source):
regex_event = \
re.compile("\s*([\w\d-]*)\s*\[\d*\]\s*([0-9.]*):\s*(\w*):\s*(.*)")
for line in source:
m = regex_event.match(line)
if not m:
continue
(process, time, event, data) = m.groups()
if event == "find_get_page":
recorder.record_find_get_page(process, data)
elif event == "add_to_page_cache":
recorder.record_add_to_page_cache(process, data)
elif event == "remove_from_page_cache" :
recorder.record_remove_from_page_cache(process, data)
else:
print "Invalid event: %s" % line
sys.exit(-1)
def show_statistics(recorder):
print "[file list]"
for f in recorder.get_files():
print " dev:%s, inode:%s, cached: %s" % (f.get_dev(), f.get_inode(), \
convert_to_size(f.count_cached_page()))
print "[process list]"
for p in recorder.get_processes():
print " process: %s" % p.get_name(),
print "(cached: %s, added: %s, removed: %s, indirect removed: %s)" % \
tuple(map(convert_to_size, [p.sum_cached_page(),
p.sum_added_page(),
p.sum_removed_page(),
p.sum_indirect_removed_page()]))
for r in p.get_records():
f = r.get_file()
print " dev:%s, inode:%s," % (f.get_dev(), f.get_inode()),
print "cached:%s, added:%s, removed:%s, indirect removed:%s" % \
tuple(map(convert_to_size, [r.count_cached_page(),
r.count_added_page(),
r.count_removed_page(),
r.count_indirect_removed_page()]))
def usage(cmd):
print "%s [options]... [file]" % cmd
print " -p, --process=name filter by process name"
print " -i, --inode=inode filter by inode"
print " -d, --dev=device filter by device"
print " -h, --help print this help"
def main(argv):
try:
opts, args = getopt.getopt(argv[1:], "hp:i:d:", \
["help", "process=", "inode=", "dev="])
except getopt.GetoptError:
usage(sys.argv[0])
sys.exit(-1)
(process, dev, inode) = (None, None, None)
for o, a in opts:
if o in ("-h", "--help"):
usage(argv[0])
sys.exit()
if o in ("-p", "--process"):
process = a
if o in ("-d", "--dev"):
dev = a
if o in ("-i", "--inode"):
inode = a
recorder = Recorder()
recorder.set_target(process, dev, inode)
process_events(recorder, sys.stdin)
show_statistics(recorder)
if __name__ == "__main__" : main(sys.argv)