Re: [PATCH 6/6] perf script: update export-to-postgresql to support callchain export

From: Adrian Hunter
Date: Fri May 06 2016 - 07:32:22 EST


On 28/04/16 11:19, Chris Phlipot wrote:
> Update the export-to-postgresql.py to support the newly introduced
> callchain export.
>
> callchains are added into the existing call_paths table and can now
> be associated with samples when the "callpaths" commandline option
> is used with the script.
>
> ex. $perf script -s export-to-postgresql.py example_db all callchains
>
> Includes the following changes to enable callchain export via the
> python export APIs:
>
> -Add the "callchains" commandline option, which is used to enable
> callchain export by setting the perf_db_export_callchains global
> -Add perf_db_export_callchains checks for call_path table creation
> and population.
> -Add call_path_id to samples_table to conform with the new API
>
> example usage and output using a small test app:
>
> test_app.c:
>
> volatile int x = 0;
> void inc_x_loop()
> {
> int i;
> for(i=0; i<100000000; i++)
> x++;
> }
>
> void a()
> {
> inc_x_loop();
> }
>
> void b()
> {
> inc_x_loop();
> }
>
> int main()
> {
> a();
> b();
> return 0;
> }
>
> example usage:
> $ gcc -g -O0 test_app.c
> $ ./perf record --call-graph=dwarf ./a.out
> [ perf record: Woken up 77 times to write data ]
> [ perf record: Captured and wrote 19.373 MB perf.data (2404 samples) ]
>
> $ ./perf script -s scripts/python/export-to-postgresql.py
> example_db all callchains
>
> $ psql example_db
>
> example_db=#
> SELECT
> (SELECT name FROM symbols WHERE id = cps.symbol_id) as symbol,
> (SELECT name FROM symbols WHERE id =
> (SELECT symbol_id from call_paths where id = cps.parent_id))
> as parent_symbol,
> sum(period) as event_count
> FROM samples join call_paths as cps on call_path_id = cps.id
> GROUP BY cps.id,evsel_id
> ORDER BY event_count DESC
> LIMIT 5;
>
> symbol | parent_symbol | event_count
> ------------------+--------------------------+-------------
> inc_x_loop | a | 734250982
> inc_x_loop | b | 731028057
> unknown | unknown | 1335858
> task_tick_fair | scheduler_tick | 1238842
> update_wall_time | tick_do_update_jiffies64 | 650373
> (5 rows)
>
> The above data shows total "self time" in cycles for each call path that
> was sampled. It is intended to demonstrate how it accounts separately
> for the two ways to reach the "inc_x_loop" function(via "a" and "b").
> Recursive common table expressions can be used as well to get cumulative
> time spent in a function as well, but that is beyond the scope of this
> basic example.
>
> Signed-off-by: Chris Phlipot <cphlipot0@xxxxxxxxx>

Acked-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>

> ---
> tools/perf/scripts/python/export-to-postgresql.py | 47 +++++++++++++++--------
> 1 file changed, 30 insertions(+), 17 deletions(-)
>
> diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
> index 6f0ca68..7656ff8 100644
> --- a/tools/perf/scripts/python/export-to-postgresql.py
> +++ b/tools/perf/scripts/python/export-to-postgresql.py
> @@ -223,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
>
> perf_db_export_mode = True
> perf_db_export_calls = False
> +perf_db_export_callchains = False
> +
>
> def usage():
> - print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
> + print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
> print >> sys.stderr, "where: columns 'all' or 'branches'"
> - print >> sys.stderr, " calls 'calls' => create calls table"
> + print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
> + print >> sys.stderr, " callchains 'callchains' => create call_paths table"
> raise Exception("Too few arguments")
>
> if (len(sys.argv) < 2):
> @@ -245,9 +248,11 @@ if columns not in ("all", "branches"):
>
> branches = (columns == "branches")
>
> -if (len(sys.argv) >= 4):
> - if (sys.argv[3] == "calls"):
> +for i in range(3,len(sys.argv)):
> + if (sys.argv[i] == "calls"):
> perf_db_export_calls = True
> + elif (sys.argv[i] == "callchains"):
> + perf_db_export_callchains = True
> else:
> usage()
>
> @@ -358,14 +363,16 @@ else:
> 'transaction bigint,'
> 'data_src bigint,'
> 'branch_type integer,'
> - 'in_tx boolean)')
> + 'in_tx boolean,'
> + 'call_path_id bigint)')
>
> -if perf_db_export_calls:
> +if perf_db_export_calls or perf_db_export_callchains:
> do_query(query, 'CREATE TABLE call_paths ('
> 'id bigint NOT NULL,'
> 'parent_id bigint,'
> 'symbol_id bigint,'
> 'ip bigint)')
> +if perf_db_export_calls:
> do_query(query, 'CREATE TABLE calls ('
> 'id bigint NOT NULL,'
> 'thread_id bigint,'
> @@ -427,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
> '(SELECT tid FROM threads WHERE id = thread_id) AS tid'
> ' FROM comm_threads')
>
> -if perf_db_export_calls:
> +if perf_db_export_calls or perf_db_export_callchains:
> do_query(query, 'CREATE VIEW call_paths_view AS '
> 'SELECT '
> 'c.id,'
> @@ -443,6 +450,7 @@ if perf_db_export_calls:
> '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
> '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
> ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
> +if perf_db_export_calls:
> do_query(query, 'CREATE VIEW calls_view AS '
> 'SELECT '
> 'calls.id,'
> @@ -540,8 +548,9 @@ dso_file = open_output_file("dso_table.bin")
> symbol_file = open_output_file("symbol_table.bin")
> branch_type_file = open_output_file("branch_type_table.bin")
> sample_file = open_output_file("sample_table.bin")
> -if perf_db_export_calls:
> +if perf_db_export_calls or perf_db_export_callchains:
> call_path_file = open_output_file("call_path_table.bin")
> +if perf_db_export_calls:
> call_file = open_output_file("call_table.bin")
>
> def trace_begin():
> @@ -553,8 +562,8 @@ def trace_begin():
> comm_table(0, "unknown")
> dso_table(0, 0, "unknown", "unknown", "")
> symbol_table(0, 0, 0, 0, 0, "unknown")
> - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
> - if perf_db_export_calls:
> + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
> + if perf_db_export_calls or perf_db_export_callchains:
> call_path_table(0, 0, 0, 0)
>
> unhandled_count = 0
> @@ -570,8 +579,9 @@ def trace_end():
> copy_output_file(symbol_file, "symbols")
> copy_output_file(branch_type_file, "branch_types")
> copy_output_file(sample_file, "samples")
> - if perf_db_export_calls:
> + if perf_db_export_calls or perf_db_export_callchains:
> copy_output_file(call_path_file, "call_paths")
> + if perf_db_export_calls:
> copy_output_file(call_file, "calls")
>
> print datetime.datetime.today(), "Removing intermediate files..."
> @@ -584,8 +594,9 @@ def trace_end():
> remove_output_file(symbol_file)
> remove_output_file(branch_type_file)
> remove_output_file(sample_file)
> - if perf_db_export_calls:
> + if perf_db_export_calls or perf_db_export_callchains:
> remove_output_file(call_path_file)
> + if perf_db_export_calls:
> remove_output_file(call_file)
> os.rmdir(output_dir_name)
> print datetime.datetime.today(), "Adding primary keys"
> @@ -598,8 +609,9 @@ def trace_end():
> do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
> do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
> do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
> - if perf_db_export_calls:
> + if perf_db_export_calls or perf_db_export_callchains:
> do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
> + if perf_db_export_calls:
> do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
>
> print datetime.datetime.today(), "Adding foreign keys"
> @@ -622,10 +634,11 @@ def trace_end():
> 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
> 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
> 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
> - if perf_db_export_calls:
> + if perf_db_export_calls or perf_db_export_callchains:
> do_query(query, 'ALTER TABLE call_paths '
> 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
> 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
> + if perf_db_export_calls:
> do_query(query, 'ALTER TABLE calls '
> 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
> 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
> @@ -693,11 +706,11 @@ def branch_type_table(branch_type, name, *x):
> value = struct.pack(fmt, 2, 4, branch_type, n, name)
> branch_type_file.write(value)
>
> -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
> +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
> if branches:
> - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
> + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
> else:
> - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
> + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
> sample_file.write(value)
>
> def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
>