[RFC/PATCH] perf sched: Implement 'perf sched runtime' command

From: Namhyung Kim
Date: Tue Aug 25 2015 - 02:14:30 EST


Currently 'perf sched record' commandsrecords various task stat info but
other perf sched commands don't use them. This is a very simple
implementation of showing those info to users.

$ sudo perf sched record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.346 MB perf.data (774 samples) ]

$ sudo perf sched runtime | head
TID:COMM Run-time Sleep-time Wait-time IOWait-time Switch Migrate
---------------------- ------------ ------------ ------------ ------------ ------ -------
1347:Socket Thread 20901793 4895322293 0 0 2 2
14855:sshd 7404420 660792968 0 0 4 0
1342:firefox 3657944 998405339 82358 0 11 0
1360:Timer 1429014 1002239227 0 0 11 0
170:systemd-journal 1209607 644878917 0 0 1 0
7:rcu_preempt 1081471 958866004 416514 0 26 6
14856:sshd 973632 506809918 0 0 6 0
14859:sleep 887558 1030418783 4055 0 3 1

Cc: David Ahern <dsahern@xxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/builtin-sched.c | 361 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 359 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 33962612a5e9..112596121242 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -98,6 +98,18 @@ struct work_atoms {
int num_merged;
};

+struct task_runtime {
+ struct list_head list;
+ struct rb_node node;
+ struct thread *thread;
+ u64 run_time;
+ u64 sleep_time;
+ u64 wait_time;
+ u64 iowait_time;
+ int num_switch;
+ int num_migrate;
+};
+
typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);

struct perf_sched;
@@ -109,6 +121,15 @@ struct trace_sched_handler {
int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine);

+ int (*sleep_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*wait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*iowait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine);

@@ -170,7 +191,7 @@ struct perf_sched {
u64 all_count;
u64 cpu_last_switched[MAX_CPUS];
struct rb_root atom_root, sorted_atom_root, merged_atom_root;
- struct list_head sort_list, cmp_pid;
+ struct list_head sort_list, cmp_pid, runtime_list;
bool force;
bool skip_merge;
};
@@ -1424,6 +1445,120 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
return 0;
}

+static struct task_runtime *get_task_runtime(struct perf_sched *sched,
+ struct machine *machine, int pid)
+{
+ struct thread *th;
+ struct task_runtime *tr;
+
+ th = machine__findnew_thread(machine, -1, pid);
+ if (th == NULL)
+ return NULL;
+
+ tr = thread__priv(th);
+ if (tr == NULL) {
+ tr = zalloc(sizeof(*tr));
+ if (tr == NULL)
+ return NULL;
+
+ thread__set_priv(th, tr);
+
+ /* hold a refcnt not to free the thread */
+ tr->thread = thread__get(th);
+
+ list_add(&tr->list, &sched->runtime_list);
+ }
+ return tr;
+}
+
+static int runtime_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "prev_pid");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->num_switch++;
+ return 0;
+}
+
+static int runtime_runtime_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u64 time = perf_evsel__intval(evsel, sample, "runtime");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->run_time += time;
+ return 0;
+}
+
+static int runtime_sleep_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u64 time = perf_evsel__intval(evsel, sample, "delay");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->sleep_time += time;
+ return 0;
+}
+
+static int runtime_wait_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u64 time = perf_evsel__intval(evsel, sample, "delay");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->wait_time += time;
+ return 0;
+}
+
+static int runtime_iowait_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u64 time = perf_evsel__intval(evsel, sample, "delay");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->iowait_time += time;
+ return 0;
+}
+
+static int runtime_migrate_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct task_runtime *tr;
+
+ tr = get_task_runtime(sched, machine, pid);
+ if (tr == NULL)
+ return -1;
+
+ tr->num_migrate++;
+ return 0;
+}
+
static int process_sched_switch_event(struct perf_tool *tool,
struct perf_evsel *evsel,
struct perf_sample *sample,
@@ -1463,6 +1598,45 @@ static int process_sched_runtime_event(struct perf_tool *tool,
return 0;
}

+static int process_sched_sleep_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->sleep_event)
+ return sched->tp_handler->sleep_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_sched_wait_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->wait_event)
+ return sched->tp_handler->wait_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
+static int process_sched_iowait_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->iowait_event)
+ return sched->tp_handler->iowait_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
static int perf_sched__process_fork_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -1560,6 +1734,143 @@ out_delete:
return rc;
}

+static int perf_sched__read_runtime_events(struct perf_sched *sched)
+{
+ const struct perf_evsel_str_handler handlers[] = {
+ { "sched:sched_switch", process_sched_switch_event, },
+ { "sched:sched_stat_runtime", process_sched_runtime_event, },
+ { "sched:sched_stat_sleep", process_sched_sleep_event, },
+ { "sched:sched_stat_wait", process_sched_wait_event, },
+ { "sched:sched_stat_iowait", process_sched_iowait_event, },
+ { "sched:sched_migrate_task", process_sched_migrate_task_event, },
+ };
+ struct perf_session *session;
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = sched->force,
+ };
+ int rc = -1;
+
+ session = perf_session__new(&file, false, &sched->tool);
+ if (session == NULL) {
+ pr_debug("No Memory for session\n");
+ return -1;
+ }
+
+ symbol__init(&session->header.env);
+
+ if (perf_session__set_tracepoints_handlers(session, handlers))
+ goto out_delete;
+
+ if (perf_session__has_traces(session, "record -R")) {
+ int err = perf_session__process_events(session);
+ if (err) {
+ pr_err("Failed to process events, error %d", err);
+ goto out_delete;
+ }
+
+ sched->nr_events = session->evlist->stats.nr_events[0];
+ sched->nr_lost_events = session->evlist->stats.total_lost;
+ sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
+ }
+
+ rc = 0;
+out_delete:
+ perf_session__delete(session);
+ return rc;
+}
+
+static int task_runtime_cmp(struct perf_sched *sched __maybe_unused,
+ struct task_runtime *a, struct task_runtime *b)
+{
+ return b->run_time - a->run_time;
+}
+
+static void sort_task_runtime(struct perf_sched *sched, struct task_runtime *tr)
+{
+ struct rb_root *root = &sched->sorted_atom_root;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p) {
+ struct task_runtime *iter;
+ int cmp;
+
+ parent = *p;
+ iter = rb_entry(parent, struct task_runtime, node);
+
+ cmp = task_runtime_cmp(sched, iter, tr);
+
+ if (cmp > 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
+ rb_link_node(&tr->node, parent, p);
+ rb_insert_color(&tr->node, root);
+}
+
+static void perf_sched__sort_runtime(struct perf_sched *sched)
+{
+ while (!list_empty(&sched->runtime_list)) {
+ struct task_runtime *tr;
+
+ tr = list_first_entry(&sched->runtime_list,
+ struct task_runtime, list);
+ list_del(&tr->list);
+
+ sort_task_runtime(sched, tr);
+ }
+}
+
+static void print_task_runtime(struct task_runtime *tr)
+{
+ struct thread *th = tr->thread;
+
+ /* tid:comm run_time sleep_time wait_time iowait_time switch migrate*/
+ printf("%5d:%-16s %12"PRIu64" %12"PRIu64" %12"PRIu64" %12"PRIu64" %6d %7d\n",
+ th->tid, thread__comm_str(th), tr->run_time, tr->sleep_time,
+ tr->wait_time, tr->iowait_time, tr->num_switch, tr->num_migrate);
+}
+
+static void perf_sched__print_runtime(struct perf_sched *sched)
+{
+ struct task_runtime *tr;
+ struct rb_root *root = &sched->sorted_atom_root;
+ struct rb_node *node = rb_first(root);
+
+ printf("%5s:%-16s %12s %12s %12s %12s %6s %7s\n",
+ "TID", "COMM", "Run-time", "Sleep-time", "Wait-time",
+ "IOWait-time", "Switch", "Migrate");
+ printf("%.22s %.12s %.12s %.12s %.12s %.6s %.7s\n",
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line);
+
+ while (node) {
+ tr = rb_entry(node, struct task_runtime, node);
+ print_task_runtime(tr);
+
+ node = rb_next(node);
+ }
+
+ /* release thread refcnt */
+ node = rb_first(root);
+ while (node) {
+ struct rb_node *next = rb_next(node);
+ rb_erase(node, root);
+
+ tr = rb_entry(node, struct task_runtime, node);
+ thread__set_priv(tr->thread, NULL);
+ thread__put(tr->thread);
+ free(tr);
+
+ node = next;
+ }
+}
+
static void print_bad_events(struct perf_sched *sched)
{
if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
@@ -1720,6 +2031,23 @@ static int perf_sched__replay(struct perf_sched *sched)
return 0;
}

+static int perf_sched__runtime(struct perf_sched *sched)
+{
+ setup_pager();
+
+ INIT_LIST_HEAD(&sched->runtime_list);
+ if (perf_sched__read_runtime_events(sched))
+ return -1;
+
+ perf_sched__sort_runtime(sched);
+ perf_sched__print_runtime(sched);
+
+ print_bad_events(sched);
+ printf("\n");
+
+ return 0;
+}
+
static void setup_sorting(struct perf_sched *sched, const struct option *options,
const char * const usage_msg[])
{
@@ -1821,6 +2149,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
OPT_END()
};
+ const struct option runtime_options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
+ OPT_END()
+ };
const struct option sched_options[] = {
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
@@ -1838,8 +2174,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"perf sched replay [<options>]",
NULL
};
+ const char * const runtime_usage[] = {
+ "perf sched runtime [<options>]",
+ NULL
+ };
const char *const sched_subcommands[] = { "record", "latency", "map",
- "replay", "script", NULL };
+ "replay", "script", "runtime",
+ NULL };
const char *sched_usage[] = {
NULL,
NULL
@@ -1858,6 +2199,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
.switch_event = replay_switch_event,
.fork_event = replay_fork_event,
};
+ struct trace_sched_handler runtime_ops = {
+ .switch_event = runtime_switch_event,
+ .runtime_event = runtime_runtime_event,
+ .sleep_event = runtime_sleep_event,
+ .wait_event = runtime_wait_event,
+ .iowait_event = runtime_iowait_event,
+ .migrate_task_event = runtime_migrate_event,
+ };
unsigned int i;

for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
@@ -1897,6 +2246,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(replay_usage, replay_options);
}
return perf_sched__replay(&sched);
+ } else if (!strncmp(argv[0], "run", 3)) {
+ sched.tp_handler = &runtime_ops;
+ if (argc) {
+ argc = parse_options(argc, argv, runtime_options, runtime_usage, 0);
+ if (argc)
+ usage_with_options(runtime_usage, runtime_options);
+ }
+ return perf_sched__runtime(&sched);
} else {
usage_with_options(sched_usage, sched_options);
}
--
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/