[PATCH 2/2] perf tool: add container identifier entry related changes

From: Hari Bathini
Date: Thu Aug 25 2016 - 08:35:34 EST


With the introduction of container identifier entry in sample data,
perf sample data can now be analyzed with regard to containers. This
patch adds cid entry related support in perf tool.

Shown below is the output of perf report, sorted based on cid, on a
system that was running three containers at the time of perf record
and clearly showing one of the containers' considerable use of kernel
memory in comparison with others:

$ perf report -s cid -n --stdio
#
# Total Lost Samples: 0
#
# Samples: 2K of event 'kmem:kmalloc'
# Event count (approx.): 2171
#
# Overhead Samples Container ID
# ........ ............ .............
#
91.20% 1980 4026532048
3.55% 77 4026532105
2.67% 58 4026532162
2.58% 56 4026531835

Signed-off-by: Hari Bathini <hbathini@xxxxxxxxxxxxxxxxxx>
---
tools/include/uapi/linux/perf_event.h | 3 ++-
tools/perf/Documentation/perf-report.txt | 3 ++-
tools/perf/Documentation/perf-script.txt | 4 ++--
tools/perf/builtin-script.c | 14 +++++++++++-
tools/perf/util/event.h | 1 +
tools/perf/util/evsel.c | 34 ++++++++++++++++++++++++++++++
tools/perf/util/hist.c | 2 ++
tools/perf/util/hist.h | 1 +
tools/perf/util/session.c | 3 +++
tools/perf/util/sort.c | 22 +++++++++++++++++++
tools/perf/util/sort.h | 2 ++
11 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index c66a485..fb4f902 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
+ PERF_SAMPLE_CID = 1U << 19,

- PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
};

/*
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 2d17462..b081aef 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -68,12 +68,13 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
- pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
+ pid, comm, cid, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.

Each key has following meaning:

- comm: command (name) of the task which can be read via /proc/<pid>/comm
- pid: command and tid of the task
+ - cid: ccontainer id of the task
- dso: name of library or module executed at the time of sample
- symbol: name of function executed at the time of sample
- parent: name of function matched to the parent regex filter. Unmatched
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1f6c705..3952783 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,8 +115,8 @@ OPTIONS
-F::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, brstack, brstacksym, flags.
+ comm, tid, pid, cid, time, cpu, event, trace, ip, sym, dso, addr,
+ symoff, srcline, period, iregs, brstack, brstacksym, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 971ff91..fe35dec 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -65,6 +65,7 @@ enum perf_output_field {
PERF_OUTPUT_WEIGHT = 1U << 18,
PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
PERF_OUTPUT_CALLINDENT = 1U << 20,
+ PERF_OUTPUT_CID = 1U << 21,
};

struct output_option {
@@ -92,6 +93,7 @@ struct output_option {
{.str = "weight", .field = PERF_OUTPUT_WEIGHT},
{.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT},
{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
+ {.str = "cid", .field = PERF_OUTPUT_CID},
};

/* default set to maintain compatibility with current format */
@@ -312,6 +314,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_IREGS))
return -EINVAL;

+ if (PRINT_FIELD(CID) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_CID, "CID",
+ PERF_OUTPUT_CID))
+ return -EINVAL;
+
return 0;
}

@@ -909,6 +916,9 @@ static void process_event(struct perf_script *script,
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample);

+ if (PRINT_FIELD(CID))
+ printf("%10u ", sample->cid);
+
printf("\n");
}

@@ -2114,8 +2124,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('F', "fields", NULL, "str",
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
- "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,brstack,brstacksym,flags,"
+ "Fields: comm,tid,pid,cid,time,cpu,event,trace,ip,sym,"
+ "dso,addr,symoff,period,iregs,brstack,brstacksym,flags,"
"callindent", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8d363d5..c35b1c5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -191,6 +191,7 @@ struct perf_sample {
u32 raw_size;
u64 data_src;
u32 flags;
+ u32 cid;
u16 insn_len;
u8 cpumode;
void *raw_data;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9b80ef..d35fbd3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -929,6 +929,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
if (opts->sample_transaction)
perf_evsel__set_sample_bit(evsel, TRANSACTION);

+ perf_evsel__set_sample_bit(evsel, CID);
+
if (opts->running_time) {
evsel->attr.read_format |=
PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -1973,6 +1975,20 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
}
}

+ data->cid = 0;
+ if (type & PERF_SAMPLE_CID) {
+ u.val64 = *array;
+
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ }
+
+ data->cid = u.val32[0];
+ array++;
+ }
+
return 0;
}

@@ -2078,6 +2094,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
}

+ if (type & PERF_SAMPLE_CID)
+ result += sizeof(u64);
+
return result;
}

@@ -2267,6 +2286,21 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
}
}

+ if (type & PERF_SAMPLE_CID) {
+ u.val32[0] = sample->cid;
+
+ if (swapped) {
+ /*
+ * Inverse of what is done in perf_evsel__parse_sample
+ */
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val64 = bswap_64(u.val64);
+ }
+
+ *array = u.val64;
+ array++;
+ }
+
return 0;
}

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index de15dbc..93d5054 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -168,6 +168,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}

+ hists__new_col_len(hists, HISTC_CID, 10);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -591,6 +592,7 @@ __hists__add_entry(struct hists *hists,
.hists = hists,
.branch_info = bi,
.mem_info = mi,
+ .cid = sample->cid,
.transaction = sample->transaction,
.raw_data = sample->raw_data,
.raw_size = sample->raw_size,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0a1edf1..1ad1d91 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -28,6 +28,7 @@ enum hist_column {
HISTC_SYMBOL,
HISTC_DSO,
HISTC_THREAD,
+ HISTC_CID,
HISTC_COMM,
HISTC_PARENT,
HISTC_CPU,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5d61242..d40f9c7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1108,6 +1108,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,

if (sample_type & PERF_SAMPLE_READ)
sample_read__printf(sample, evsel->attr.read_format);
+
+ if (sample_type & PERF_SAMPLE_CID)
+ printf("... cid: %u\n", sample->cid);
}

static struct machine *machines__find_for_cpumode(struct machines *machines,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 947d21f..aa2ee43 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1392,6 +1392,27 @@ struct sort_entry sort_transaction = {
.se_width_idx = HISTC_TRANSACTION,
};

+/* --sort cid */
+
+static int64_t
+sort__cid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return (int64_t)right->cid - (int64_t)left->cid;
+}
+
+static int hist_entry__cid_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->cid);
+}
+
+struct sort_entry sort_cid = {
+ .se_header = "Container ID ",
+ .se_cmp = sort__cid_cmp,
+ .se_snprintf = hist_entry__cid_snprintf,
+ .se_width_idx = HISTC_CID,
+};
+
struct sort_dimension {
const char *name;
struct sort_entry *entry;
@@ -1414,6 +1435,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
DIM(SORT_TRACE, "trace", sort_trace),
+ DIM(SORT_CID, "cid", sort_cid),
};

#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7ca37ea..eea40e5 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -94,6 +94,7 @@ struct hist_entry {
u64 transaction;
s32 socket;
s32 cpu;
+ u32 cid;
u8 cpumode;
u8 depth;

@@ -210,6 +211,7 @@ enum sort_type {
SORT_GLOBAL_WEIGHT,
SORT_TRANSACTION,
SORT_TRACE,
+ SORT_CID,

/* branch stack specific sort keys */
__SORT_BRANCH_STACK,