[RFC v1 13/16] perf kwork top: Add -C/--cpu -i/--input -n/--name -s/--sort --time options

From: Yang Jihong
Date: Sat Aug 12 2023 - 04:52:38 EST


Provide the following options for perf kwork top:

1. -C, --cpu <cpu> list of cpus to profile
2. -i, --input <file> input file name
3. -n, --name <name> event name to profile
4. -s, --sort <key[,key2...]> sort by key(s): rate, runtime, tid
5. --time <str> Time span for analysis (start,stop)

Example usage:

# perf kwork top -h

Usage: perf kwork top [<options>]

-C, --cpu <cpu> list of cpus to profile
-i, --input <file> input file name
-n, --name <name> event name to profile
-s, --sort <key[,key2...]>
sort by key(s): rate, runtime, tid
--time <str> Time span for analysis (start,stop)

# perf kwork top -C 2,4,5

Total : 51226.940 ms, 3 cpus
%Cpu(s): 92.59% id, 0.00% hi, 0.09% si
%Cpu2 [| 4.61%]
%Cpu4 [ 0.01%]
%Cpu5 [||||| 17.31%]

PID %CPU RUNTIME COMMMAND
----------------------------------------------------
0 99.98 17073.515 ms swapper/4
0 95.17 16250.874 ms swapper/2
0 82.62 14108.577 ms swapper/5
4342 21.70 3708.358 ms perf
16 0.13 22.296 ms rcu_preempt
75 0.02 4.261 ms kworker/2:1
98 0.01 2.540 ms jbd2/sda-8
61 0.01 3.404 ms kcompactd0
87 0.00 0.145 ms kworker/5:1H
73 0.00 0.596 ms kworker/5:1
41 0.00 0.041 ms ksoftirqd/5
40 0.00 0.718 ms migration/5
64 0.00 0.115 ms kworker/4:1
35 0.00 0.556 ms migration/4
353 0.00 1.143 ms sshd
26 0.00 1.665 ms ksoftirqd/2
25 0.00 0.662 ms migration/2

# perf kwork top -i perf.data

Total : 136601.588 ms, 8 cpus
%Cpu(s): 95.66% id, 0.04% hi, 0.05% si
%Cpu0 [ 0.02%]
%Cpu1 [ 0.01%]
%Cpu2 [| 4.61%]
%Cpu3 [ 0.04%]
%Cpu4 [ 0.01%]
%Cpu5 [||||| 17.31%]
%Cpu6 [ 0.51%]
%Cpu7 [||| 11.42%]

PID %CPU RUNTIME COMMMAND
----------------------------------------------------
0 99.98 17073.515 ms swapper/4
0 99.98 17072.173 ms swapper/1
0 99.93 17064.229 ms swapper/3
0 99.62 17011.013 ms swapper/0
0 99.47 16985.180 ms swapper/6
0 95.17 16250.874 ms swapper/2
0 88.51 15111.684 ms swapper/7
0 82.62 14108.577 ms swapper/5
4342 33.00 5644.045 ms perf
4344 0.43 74.351 ms perf
16 0.13 22.296 ms rcu_preempt
4345 0.05 10.093 ms perf
4343 0.05 8.769 ms perf
4341 0.02 4.882 ms perf
4095 0.02 4.605 ms kworker/7:1
75 0.02 4.261 ms kworker/2:1
120 0.01 1.909 ms systemd-journal
98 0.01 2.540 ms jbd2/sda-8
61 0.01 3.404 ms kcompactd0
667 0.01 2.542 ms kworker/u16:2
4340 0.00 1.052 ms kworker/7:2
97 0.00 0.489 ms kworker/7:1H
51 0.00 0.209 ms ksoftirqd/7
50 0.00 0.646 ms migration/7
76 0.00 0.753 ms kworker/6:1
45 0.00 0.572 ms migration/6
87 0.00 0.145 ms kworker/5:1H
73 0.00 0.596 ms kworker/5:1
41 0.00 0.041 ms ksoftirqd/5
40 0.00 0.718 ms migration/5
64 0.00 0.115 ms kworker/4:1
35 0.00 0.556 ms migration/4
353 0.00 2.600 ms sshd
74 0.00 0.205 ms kworker/3:1
33 0.00 1.576 ms kworker/3:0H
30 0.00 0.996 ms migration/3
26 0.00 1.665 ms ksoftirqd/2
25 0.00 0.662 ms migration/2
397 0.00 0.057 ms kworker/1:1
20 0.00 1.005 ms migration/1
2909 0.00 1.053 ms kworker/0:2
17 0.00 0.720 ms migration/0
15 0.00 0.039 ms ksoftirqd/0

# perf kwork top -n perf

Total : 136601.588 ms, 8 cpus
%Cpu(s): 95.66% id, 0.04% hi, 0.05% si
%Cpu0 [ 0.01%]
%Cpu1 [ 0.00%]
%Cpu2 [| 4.44%]
%Cpu3 [ 0.00%]
%Cpu4 [ 0.00%]
%Cpu5 [ 0.00%]
%Cpu6 [ 0.49%]
%Cpu7 [||| 11.38%]

PID %CPU RUNTIME COMMMAND
----------------------------------------------------
4342 15.74 2695.516 ms perf
4344 0.43 74.351 ms perf
4345 0.05 10.093 ms perf
4343 0.05 8.769 ms perf
4341 0.02 4.882 ms perf

# perf kwork top -s tid

Total : 136601.588 ms, 8 cpus
%Cpu(s): 95.66% id, 0.04% hi, 0.05% si
%Cpu0 [ 0.02%]
%Cpu1 [ 0.01%]
%Cpu2 [| 4.61%]
%Cpu3 [ 0.04%]
%Cpu4 [ 0.01%]
%Cpu5 [||||| 17.31%]
%Cpu6 [ 0.51%]
%Cpu7 [||| 11.42%]

PID %CPU RUNTIME COMMMAND
----------------------------------------------------
0 99.62 17011.013 ms swapper/0
0 99.98 17072.173 ms swapper/1
0 95.17 16250.874 ms swapper/2
0 99.93 17064.229 ms swapper/3
0 99.98 17073.515 ms swapper/4
0 82.62 14108.577 ms swapper/5
0 99.47 16985.180 ms swapper/6
0 88.51 15111.684 ms swapper/7
15 0.00 0.039 ms ksoftirqd/0
16 0.13 22.296 ms rcu_preempt
17 0.00 0.720 ms migration/0
20 0.00 1.005 ms migration/1
25 0.00 0.662 ms migration/2
26 0.00 1.665 ms ksoftirqd/2
30 0.00 0.996 ms migration/3
33 0.00 1.576 ms kworker/3:0H
35 0.00 0.556 ms migration/4
40 0.00 0.718 ms migration/5
41 0.00 0.041 ms ksoftirqd/5
45 0.00 0.572 ms migration/6
50 0.00 0.646 ms migration/7
51 0.00 0.209 ms ksoftirqd/7
61 0.01 3.404 ms kcompactd0
64 0.00 0.115 ms kworker/4:1
73 0.00 0.596 ms kworker/5:1
74 0.00 0.205 ms kworker/3:1
75 0.02 4.261 ms kworker/2:1
76 0.00 0.753 ms kworker/6:1
87 0.00 0.145 ms kworker/5:1H
97 0.00 0.489 ms kworker/7:1H
98 0.01 2.540 ms jbd2/sda-8
120 0.01 1.909 ms systemd-journal
353 0.00 2.600 ms sshd
397 0.00 0.057 ms kworker/1:1
667 0.01 2.542 ms kworker/u16:2
2909 0.00 1.053 ms kworker/0:2
4095 0.02 4.605 ms kworker/7:1
4340 0.00 1.052 ms kworker/7:2
4341 0.02 4.882 ms perf
4342 33.00 5644.045 ms perf
4343 0.05 8.769 ms perf
4344 0.43 74.351 ms perf
4345 0.05 10.093 ms perf

# perf kwork top --time 128800,

Total : 53495.122 ms, 8 cpus
%Cpu(s): 94.71% id, 0.09% hi, 0.09% si
%Cpu0 [ 0.07%]
%Cpu1 [ 0.04%]
%Cpu2 [|| 8.49%]
%Cpu3 [ 0.09%]
%Cpu4 [ 0.02%]
%Cpu5 [ 0.06%]
%Cpu6 [ 0.12%]
%Cpu7 [|||||| 21.24%]

PID %CPU RUNTIME COMMMAND
----------------------------------------------------
0 99.96 3981.363 ms swapper/4
0 99.94 3978.955 ms swapper/1
0 99.91 9329.375 ms swapper/5
0 99.87 4906.829 ms swapper/3
0 99.86 9028.064 ms swapper/6
0 98.67 3928.161 ms swapper/0
0 91.17 8388.432 ms swapper/2
0 78.65 7125.602 ms swapper/7
4342 29.42 2675.198 ms perf
16 0.18 16.817 ms rcu_preempt
4345 0.09 8.183 ms perf
4344 0.04 4.290 ms perf
4343 0.03 2.844 ms perf
353 0.03 2.600 ms sshd
4095 0.02 2.702 ms kworker/7:1
120 0.02 1.909 ms systemd-journal
98 0.02 2.540 ms jbd2/sda-8
61 0.02 1.886 ms kcompactd0
667 0.02 1.011 ms kworker/u16:2
75 0.02 2.693 ms kworker/2:1
4341 0.01 1.838 ms perf
30 0.01 0.788 ms migration/3
26 0.01 1.665 ms ksoftirqd/2
20 0.01 0.752 ms migration/1
2909 0.01 0.604 ms kworker/0:2
4340 0.00 0.635 ms kworker/7:2
97 0.00 0.214 ms kworker/7:1H
51 0.00 0.209 ms ksoftirqd/7
50 0.00 0.646 ms migration/7
76 0.00 0.602 ms kworker/6:1
45 0.00 0.366 ms migration/6
87 0.00 0.145 ms kworker/5:1H
40 0.00 0.446 ms migration/5
35 0.00 0.318 ms migration/4
74 0.00 0.205 ms kworker/3:1
33 0.00 0.080 ms kworker/3:0H
25 0.00 0.448 ms migration/2
397 0.00 0.057 ms kworker/1:1
17 0.00 0.365 ms migration/0

Signed-off-by: Yang Jihong <yangjihong1@xxxxxxxxxx>
---
tools/perf/Documentation/perf-kwork.txt | 26 +++++++++++
tools/perf/builtin-kwork.c | 57 +++++++++++++++++++++++--
2 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-kwork.txt b/tools/perf/Documentation/perf-kwork.txt
index 0601fcb0feea..34d6c285e527 100644
--- a/tools/perf/Documentation/perf-kwork.txt
+++ b/tools/perf/Documentation/perf-kwork.txt
@@ -178,6 +178,32 @@ OPTIONS for 'perf kwork timehist'
stop time is not given (i.e, time string is 'x.y,') then analysis goes
to end of file.

+OPTIONS for 'perf kwork top'
+---------------------------------
+
+-C::
+--cpu::
+ Only show events for the given CPU(s) (comma separated list).
+
+-i::
+--input::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-n::
+--name::
+ Only show events for the given name.
+
+-s::
+--sort::
+ Sort by key(s): rate, runtime, tid
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
SEE ALSO
--------
linkperf:perf-record[1]
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index c741cc1a543f..d5949ff4bd15 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -146,6 +146,24 @@ static int cpu_usage_cmp(struct kwork_work *l, struct kwork_work *r)
return 0;
}

+static int id_or_cpu_r_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+ if (l->id < r->id)
+ return 1;
+ if (l->id > r->id)
+ return -1;
+
+ if (l->id != 0)
+ return 0;
+
+ if (l->cpu < r->cpu)
+ return 1;
+ if (l->cpu > r->cpu)
+ return -1;
+
+ return 0;
+}
+
static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
const char *tok, struct list_head *list)
{
@@ -174,6 +192,10 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
.name = "rate",
.cmp = cpu_usage_cmp,
};
+ static struct sort_dimension tid_sort_dimension = {
+ .name = "tid",
+ .cmp = id_or_cpu_r_cmp,
+ };
struct sort_dimension *available_sorts[] = {
&id_sort_dimension,
&max_sort_dimension,
@@ -181,6 +203,7 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
&runtime_sort_dimension,
&avg_sort_dimension,
&rate_sort_dimension,
+ &tid_sort_dimension,
};

if (kwork->report == KWORK_REPORT_LATENCY)
@@ -381,6 +404,17 @@ static void profile_update_timespan(struct perf_kwork *kwork,
kwork->timeend = sample->time;
}

+static bool profile_name_match(struct perf_kwork *kwork,
+ struct kwork_work *work)
+{
+ if (kwork->profile_name && work->name &&
+ (strcmp(work->name, kwork->profile_name) != 0)) {
+ return false;
+ }
+
+ return true;
+}
+
static bool profile_event_match(struct perf_kwork *kwork,
struct kwork_work *work,
struct perf_sample *sample)
@@ -396,10 +430,14 @@ static bool profile_event_match(struct perf_kwork *kwork,
((ptime->end != 0) && (ptime->end < time)))
return false;

- if ((kwork->profile_name != NULL) &&
- (work->name != NULL) &&
- (strcmp(work->name, kwork->profile_name) != 0))
+ /*
+ * report top needs to collect the runtime of all tasks to
+ * calculate the load of each core.
+ */
+ if ((kwork->report != KWORK_REPORT_TOP) &&
+ !profile_name_match(kwork, work)) {
return false;
+ }

profile_update_timespan(kwork, sample);
return true;
@@ -2070,6 +2108,9 @@ static void top_merge_tasks(struct perf_kwork *kwork)
rb_erase_cached(node, &class->work_root);
data = rb_entry(node, struct kwork_work, node);

+ if (!profile_name_match(kwork, data))
+ continue;
+
cpu = data->cpu;
merged_work = find_work_by_id(&merged_root, data->id,
data->id == 0 ? cpu : -1);
@@ -2329,6 +2370,16 @@ int cmd_kwork(int argc, const char **argv)
OPT_PARENT(kwork_options)
};
const struct option top_options[] = {
+ OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]",
+ "sort by key(s): rate, runtime, tid"),
+ OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING('n', "name", &kwork.profile_name, "name",
+ "event name to profile"),
+ OPT_STRING(0, "time", &kwork.time_str, "str",
+ "Time span for analysis (start,stop)"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
OPT_PARENT(kwork_options)
};
const char *kwork_usage[] = {
--
2.30.GIT