Re: [PATCH RFC] hist lookups
From: Arnaldo Carvalho de Melo
Date: Wed Oct 31 2018 - 08:43:16 EST
Em Tue, Oct 30, 2018 at 10:03:28PM -0700, David Miller escreveu:
>
> So when a cpu is overpowered processing samples, most of the time is
> spent in the histogram code.
>
> It seems we initialize a ~262 byte structure on the stack to do every
> histogram entry lookup.
>
> This is a side effect of how the sorting code is shared with the code
> that does lookups and insertions into the histogram tree(s).
>
> I tried to change this so that lookups use a smaller key, but it gets
> ugly real fast.
>
> I don't know when I'd be able to work more on this so I'm posting this
> hoping maybe someone else can move it forward, or maybe even find a
> better way to do this.
Added Namhyung to the CC list, that is in vacations right now but said
he would look into the issues recently raised when he gets back.
Thanks a lot for all the work you did in this sprint, really
appreciated, I'm processing the fixes for the fallback to kallsyms and
the other patch you submitted, will do tests and push to Ingo and
revisit this after I get ready for Vancouver ;-)
- Arnaldo
> The histogram code is really the limiting factor in how well perf can
> handle high sample rates.
>
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index f96c005..f0265e4 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -81,6 +81,12 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
> return right->thread->tid - left->thread->tid;
> }
>
> +static int64_t
> +sort__thread_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return key->al->thread->tid - entry->thread->tid;
> +}
> +
> static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -104,6 +110,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void
> struct sort_entry sort_thread = {
> .se_header = " Pid:Command",
> .se_cmp = sort__thread_cmp,
> + .se_cmp_key = sort__thread_cmp_key,
> .se_snprintf = hist_entry__thread_snprintf,
> .se_filter = hist_entry__thread_filter,
> .se_width_idx = HISTC_THREAD,
> @@ -123,6 +130,13 @@ sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
> }
>
> static int64_t
> +sort__comm_cmp_key(struct hist_entry *entry,
> + struct hist_entry_cmp_key *key)
> +{
> + return strcmp(comm__str(key->comm), comm__str(entry->comm));
> +}
> +
> +static int64_t
> sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
> {
> return strcmp(comm__str(right->comm), comm__str(left->comm));
> @@ -143,6 +157,7 @@ static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_comm = {
> .se_header = "Command",
> .se_cmp = sort__comm_cmp,
> + .se_cmp_key = sort__comm_cmp_key,
> .se_collapse = sort__comm_collapse,
> .se_sort = sort__comm_sort,
> .se_snprintf = hist_entry__comm_snprintf,
> @@ -178,6 +193,12 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
> return _sort__dso_cmp(right->ms.map, left->ms.map);
> }
>
> +static int64_t
> +sort__dso_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return _sort__dso_cmp(key->al->map, entry->ms.map);
> +}
> +
> static int _hist_entry__dso_snprintf(struct map *map, char *bf,
> size_t size, unsigned int width)
> {
> @@ -209,6 +230,7 @@ static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *a
> struct sort_entry sort_dso = {
> .se_header = "Shared Object",
> .se_cmp = sort__dso_cmp,
> + .se_cmp_key = sort__dso_cmp_key,
> .se_snprintf = hist_entry__dso_snprintf,
> .se_filter = hist_entry__dso_filter,
> .se_width_idx = HISTC_DSO,
> @@ -260,6 +282,25 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
> }
>
> static int64_t
> +sort__sym_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + int64_t ret;
> +
> + if (!entry->ms.sym && !key->al->sym)
> + return _sort__addr_cmp(entry->ip, key->al->addr);
> +
> + /*
> + * comparing symbol address alone is not enough since it's a
> + * relative address within a dso.
> + */
> + ret = sort__dso_cmp_key(entry, key);
> + if (ret != 0)
> + return ret;
> +
> + return _sort__sym_cmp(entry->ms.sym, key->al->sym);
> +}
> +
> +static int64_t
> sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
> {
> if (!left->ms.sym || !right->ms.sym)
> @@ -323,6 +364,7 @@ static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *a
> struct sort_entry sort_sym = {
> .se_header = "Symbol",
> .se_cmp = sort__sym_cmp,
> + .se_cmp_key = sort__sym_cmp_key,
> .se_sort = sort__sym_sort,
> .se_snprintf = hist_entry__sym_snprintf,
> .se_filter = hist_entry__sym_filter,
> @@ -347,6 +389,18 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(right->srcline, left->srcline);
> }
>
> +static int64_t
> +sort__srcline_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->srcline)
> + entry->srcline = hist_entry__srcline(entry);
> + if (!key->al->srcline)
> + key->al->srcline =
> + map__srcline(key->al->map, key->al->addr, key->al->sym);
> +
> + return strcmp(key->al->srcline, entry->srcline);
> +}
> +
> static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -359,6 +413,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_srcline = {
> .se_header = "Source:Line",
> .se_cmp = sort__srcline_cmp,
> + .se_cmp_key = sort__srcline_cmp_key,
> .se_snprintf = hist_entry__srcline_snprintf,
> .se_width_idx = HISTC_SRCLINE,
> };
> @@ -382,6 +437,18 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
> }
>
> +static int64_t
> +sort__srcline_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info->srcline_from)
> + entry->branch_info->srcline_from = addr_map_symbol__srcline(&entry->branch_info->from);
> +
> + if (!key->bi->srcline_from)
> + key->bi->srcline_from = addr_map_symbol__srcline(&key->bi->from);
> +
> + return strcmp(key->bi->srcline_from, entry->branch_info->srcline_from);
> +}
> +
> static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -391,6 +458,7 @@ static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_srcline_from = {
> .se_header = "From Source:Line",
> .se_cmp = sort__srcline_from_cmp,
> + .se_cmp_key = sort__srcline_from_cmp_key,
> .se_snprintf = hist_entry__srcline_from_snprintf,
> .se_width_idx = HISTC_SRCLINE_FROM,
> };
> @@ -409,6 +477,18 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
> }
>
> +static int64_t
> +sort__srcline_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info->srcline_to)
> + entry->branch_info->srcline_to = addr_map_symbol__srcline(&entry->branch_info->to);
> +
> + if (!key->bi->srcline_to)
> + key->bi->srcline_to = addr_map_symbol__srcline(&key->bi->to);
> +
> + return strcmp(key->bi->srcline_to, entry->branch_info->srcline_to);
> +}
> +
> static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -418,6 +498,7 @@ static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_srcline_to = {
> .se_header = "To Source:Line",
> .se_cmp = sort__srcline_to_cmp,
> + .se_cmp_key = sort__srcline_to_cmp_key,
> .se_snprintf = hist_entry__srcline_to_snprintf,
> .se_width_idx = HISTC_SRCLINE_TO,
> };
> @@ -426,16 +507,16 @@ struct sort_entry sort_srcline_to = {
>
> static char no_srcfile[1];
>
> -static char *hist_entry__get_srcfile(struct hist_entry *e)
> +static char *__hist_entry__get_srcfile(struct map *map, struct symbol *sym,
> + u64 ip)
> {
> char *sf, *p;
> - struct map *map = e->ms.map;
>
> if (!map)
> return no_srcfile;
>
> - sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
> - e->ms.sym, false, true, true, e->ip);
> + sf = __get_srcline(map->dso, map__rip_2objdump(map, ip),
> + sym, false, true, true, ip);
> if (!strcmp(sf, SRCLINE_UNKNOWN))
> return no_srcfile;
> p = strchr(sf, ':');
> @@ -447,6 +528,15 @@ static char *hist_entry__get_srcfile(struct hist_entry *e)
> return no_srcfile;
> }
>
> +static char *hist_entry__get_srcfile(struct hist_entry *e)
> +{
> + return __hist_entry__get_srcfile(e->ms.map, e->ms.sym, e->ip);
> +}
> +
> +static char *hist_entry_key__get_srcfile(struct hist_entry_cmp_key *key)
> +{
> + return __hist_entry__get_srcfile(key->al->map, key->al->sym, key->al->addr);
> +}
> static int64_t
> sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
> {
> @@ -458,6 +548,17 @@ sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(right->srcfile, left->srcfile);
> }
>
> +static int64_t
> +sort__srcfile_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->srcfile)
> + entry->srcfile = hist_entry__get_srcfile(entry);
> + if (!key->srcfile)
> + key->srcfile = hist_entry_key__get_srcfile(key);
> +
> + return strcmp(key->srcfile, entry->srcfile);
> +}
> +
> static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -470,6 +571,7 @@ static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_srcfile = {
> .se_header = "Source File",
> .se_cmp = sort__srcfile_cmp,
> + .se_cmp_key = sort__srcfile_cmp_key,
> .se_snprintf = hist_entry__srcfile_snprintf,
> .se_width_idx = HISTC_SRCFILE,
> };
> @@ -488,6 +590,18 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(sym_r->name, sym_l->name);
> }
>
> +static int64_t
> +sort__parent_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct symbol *sym_l = entry->parent;
> + struct symbol *sym_r = key->sym_parent;
> +
> + if (!sym_l || !sym_r)
> + return cmp_null(sym_l, sym_r);
> +
> + return strcmp(sym_r->name, sym_l->name);
> +}
> +
> static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -498,6 +612,7 @@ static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_parent = {
> .se_header = "Parent symbol",
> .se_cmp = sort__parent_cmp,
> + .se_cmp_key = sort__parent_cmp_key,
> .se_snprintf = hist_entry__parent_snprintf,
> .se_width_idx = HISTC_PARENT,
> };
> @@ -510,6 +625,12 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
> return right->cpu - left->cpu;
> }
>
> +static int64_t
> +sort__cpu_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return key->al->cpu - entry->cpu;
> +}
> +
> static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -519,6 +640,7 @@ static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_cpu = {
> .se_header = "CPU",
> .se_cmp = sort__cpu_cmp,
> + .se_cmp_key = sort__cpu_cmp_key,
> .se_snprintf = hist_entry__cpu_snprintf,
> .se_width_idx = HISTC_CPU,
> };
> @@ -548,6 +670,22 @@ sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
> left->cgroup_id.ino);
> }
>
> +static int64_t
> +sort__cgroup_id_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct namespaces *ns = thread__namespaces(key->al->thread);
> + int64_t ret;
> + u64 val;
> +
> + val = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0;
> + ret = _sort__cgroup_dev_cmp(val, entry->cgroup_id.dev);
> + if (ret != 0)
> + return ret;
> +
> + val = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0;
> + return _sort__cgroup_inode_cmp(val, entry->cgroup_id.ino);
> +}
> +
> static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
> char *bf, size_t size,
> unsigned int width __maybe_unused)
> @@ -559,6 +697,7 @@ static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
> struct sort_entry sort_cgroup_id = {
> .se_header = "cgroup id (dev/inode)",
> .se_cmp = sort__cgroup_id_cmp,
> + .se_cmp_key = sort__cgroup_id_cmp_key,
> .se_snprintf = hist_entry__cgroup_id_snprintf,
> .se_width_idx = HISTC_CGROUP_ID,
> };
> @@ -571,6 +710,12 @@ sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
> return right->socket - left->socket;
> }
>
> +static int64_t
> +sort__socket_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return key->al->socket - entry->socket;
> +}
> +
> static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -590,6 +735,7 @@ static int hist_entry__socket_filter(struct hist_entry *he, int type, const void
> struct sort_entry sort_socket = {
> .se_header = "Socket",
> .se_cmp = sort__socket_cmp,
> + .se_cmp_key = sort__socket_cmp_key,
> .se_snprintf = hist_entry__socket_snprintf,
> .se_filter = hist_entry__socket_filter,
> .se_width_idx = HISTC_SOCKET,
> @@ -597,20 +743,21 @@ struct sort_entry sort_socket = {
>
> /* --sort trace */
>
> -static char *get_trace_output(struct hist_entry *he)
> +static char *__get_trace_output(struct hists *hists, void *raw_data,
> + u32 raw_size)
> {
> struct trace_seq seq;
> struct perf_evsel *evsel;
> struct tep_record rec = {
> - .data = he->raw_data,
> - .size = he->raw_size,
> + .data = raw_data,
> + .size = raw_size,
> };
>
> - evsel = hists_to_evsel(he->hists);
> + evsel = hists_to_evsel(hists);
>
> trace_seq_init(&seq);
> if (symbol_conf.raw_trace) {
> - tep_print_fields(&seq, he->raw_data, he->raw_size,
> + tep_print_fields(&seq, raw_data, raw_size,
> evsel->tp_format);
> } else {
> tep_event_info(&seq, evsel->tp_format, &rec);
> @@ -622,6 +769,16 @@ static char *get_trace_output(struct hist_entry *he)
> return realloc(seq.buffer, seq.len + 1);
> }
>
> +static char *get_trace_output(struct hist_entry *he)
> +{
> + return __get_trace_output(he->hists, he->raw_data, he->raw_size);
> +}
> +
> +static char *get_trace_output_key(struct hists *hists, struct hist_entry_cmp_key *key)
> +{
> + return __get_trace_output(hists, key->sample->raw_data, key->sample->raw_size);
> +}
> +
> static int64_t
> sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
> {
> @@ -639,6 +796,23 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
> return strcmp(right->trace_output, left->trace_output);
> }
>
> +static int64_t
> +sort__trace_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct perf_evsel *evsel;
> +
> + evsel = hists_to_evsel(entry->hists);
> + if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
> + return 0;
> +
> + if (entry->trace_output == NULL)
> + entry->trace_output = get_trace_output(entry);
> + if (key->trace_output == NULL)
> + key->trace_output = get_trace_output_key(entry->hists, key);
> +
> + return strcmp(key->trace_output, entry->trace_output);
> +}
> +
> static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -656,6 +830,7 @@ static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_trace = {
> .se_header = "Trace output",
> .se_cmp = sort__trace_cmp,
> + .se_cmp_key = sort__trace_cmp_key,
> .se_snprintf = hist_entry__trace_snprintf,
> .se_width_idx = HISTC_TRACE,
> };
> @@ -672,6 +847,16 @@ sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
> right->branch_info->from.map);
> }
>
> +static int64_t
> +sort__dso_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + return _sort__dso_cmp(entry->branch_info->from.map,
> + key->bi->from.map);
> +}
> +
> static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -704,6 +889,16 @@ sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
> right->branch_info->to.map);
> }
>
> +static int64_t
> +sort__dso_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + return _sort__dso_cmp(entry->branch_info->to.map,
> + key->bi->to.map);
> +}
> +
> static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -745,6 +940,24 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
> }
>
> static int64_t
> +sort__sym_from_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct addr_map_symbol *from_l = &entry->branch_info->from;
> + struct addr_map_symbol *from_r = &key->bi->from;
> +
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + from_l = &entry->branch_info->from;
> + from_r = &key->bi->from;
> +
> + if (!from_l->sym && !from_r->sym)
> + return _sort__addr_cmp(from_l->addr, from_r->addr);
> +
> + return _sort__sym_cmp(from_l->sym, from_r->sym);
> +}
> +
> +static int64_t
> sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
> {
> struct addr_map_symbol *to_l, *to_r;
> @@ -761,6 +974,23 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
> return _sort__sym_cmp(to_l->sym, to_r->sym);
> }
>
> +static int64_t
> +sort__sym_to_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct addr_map_symbol *to_l, *to_r;
> +
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + to_l = &entry->branch_info->to;
> + to_r = &key->bi->to;
> +
> + if (!to_l->sym && !to_r->sym)
> + return _sort__addr_cmp(to_l->addr, to_r->addr);
> +
> + return _sort__sym_cmp(to_l->sym, to_r->sym);
> +}
> +
> static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -814,6 +1044,7 @@ static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
> struct sort_entry sort_dso_from = {
> .se_header = "Source Shared Object",
> .se_cmp = sort__dso_from_cmp,
> + .se_cmp_key = sort__dso_from_cmp_key,
> .se_snprintf = hist_entry__dso_from_snprintf,
> .se_filter = hist_entry__dso_from_filter,
> .se_width_idx = HISTC_DSO_FROM,
> @@ -822,6 +1053,7 @@ struct sort_entry sort_dso_from = {
> struct sort_entry sort_dso_to = {
> .se_header = "Target Shared Object",
> .se_cmp = sort__dso_to_cmp,
> + .se_cmp_key = sort__dso_to_cmp_key,
> .se_snprintf = hist_entry__dso_to_snprintf,
> .se_filter = hist_entry__dso_to_filter,
> .se_width_idx = HISTC_DSO_TO,
> @@ -830,6 +1062,7 @@ struct sort_entry sort_dso_to = {
> struct sort_entry sort_sym_from = {
> .se_header = "Source Symbol",
> .se_cmp = sort__sym_from_cmp,
> + .se_cmp_key = sort__sym_from_cmp_key,
> .se_snprintf = hist_entry__sym_from_snprintf,
> .se_filter = hist_entry__sym_from_filter,
> .se_width_idx = HISTC_SYMBOL_FROM,
> @@ -838,6 +1071,7 @@ struct sort_entry sort_sym_from = {
> struct sort_entry sort_sym_to = {
> .se_header = "Target Symbol",
> .se_cmp = sort__sym_to_cmp,
> + .se_cmp_key = sort__sym_to_cmp_key,
> .se_snprintf = hist_entry__sym_to_snprintf,
> .se_filter = hist_entry__sym_to_filter,
> .se_width_idx = HISTC_SYMBOL_TO,
> @@ -856,6 +1090,19 @@ sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
> return mp || p;
> }
>
> +static int64_t
> +sort__mispredict_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + unsigned char mp, p;
> +
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + mp = entry->branch_info->flags.mispred != key->bi->flags.mispred;
> + p = entry->branch_info->flags.predicted != key->bi->flags.predicted;
> + return mp || p;
> +}
> +
> static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width){
> static const char *out = "N/A";
> @@ -880,6 +1127,16 @@ sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
> right->branch_info->flags.cycles;
> }
>
> +static int64_t
> +sort__cycles_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + return entry->branch_info->flags.cycles -
> + key->bi->flags.cycles;
> +}
> +
> static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -894,6 +1151,7 @@ static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_cycles = {
> .se_header = "Basic Block Cycles",
> .se_cmp = sort__cycles_cmp,
> + .se_cmp_key = sort__cycles_cmp_key,
> .se_snprintf = hist_entry__cycles_snprintf,
> .se_width_idx = HISTC_CYCLES,
> };
> @@ -912,6 +1170,19 @@ sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(r - l);
> }
>
> +static int64_t
> +sort__daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + uint64_t l = 0, r = 0;
> +
> + if (entry->mem_info)
> + l = entry->mem_info->daddr.addr;
> + if (key->mem_info)
> + r = key->mem_info->daddr.addr;
> +
> + return (int64_t)(r - l);
> +}
> +
> static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -941,6 +1212,19 @@ sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(r - l);
> }
>
> +static int64_t
> +sort__iaddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + uint64_t l = 0, r = 0;
> +
> + if (entry->mem_info)
> + l = entry->mem_info->iaddr.addr;
> + if (key->mem_info)
> + r = key->mem_info->iaddr.addr;
> +
> + return (int64_t)(r - l);
> +}
> +
> static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -971,6 +1255,20 @@ sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
> return _sort__dso_cmp(map_l, map_r);
> }
>
> +static int64_t
> +sort__dso_daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct map *map_l = NULL;
> + struct map *map_r = NULL;
> +
> + if (entry->mem_info)
> + map_l = entry->mem_info->daddr.map;
> + if (key->mem_info)
> + map_r = key->mem_info->daddr.map;
> +
> + return _sort__dso_cmp(map_l, map_r);
> +}
> +
> static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1001,6 +1299,25 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
> }
>
> +static int64_t
> +sort__locked_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + union perf_mem_data_src data_src_l;
> + union perf_mem_data_src data_src_r;
> +
> + if (entry->mem_info)
> + data_src_l = entry->mem_info->data_src;
> + else
> + data_src_l.mem_lock = PERF_MEM_LOCK_NA;
> +
> + if (key->mem_info)
> + data_src_r = key->mem_info->data_src;
> + else
> + data_src_r.mem_lock = PERF_MEM_LOCK_NA;
> +
> + return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
> +}
> +
> static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1029,6 +1346,25 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
> }
>
> +static int64_t
> +sort__tlb_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + union perf_mem_data_src data_src_l;
> + union perf_mem_data_src data_src_r;
> +
> + if (entry->mem_info)
> + data_src_l = entry->mem_info->data_src;
> + else
> + data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
> +
> + if (key->mem_info)
> + data_src_r = key->mem_info->data_src;
> + else
> + data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
> +
> + return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
> +}
> +
> static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1057,6 +1393,25 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
> }
>
> +static int64_t
> +sort__lvl_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + union perf_mem_data_src data_src_l;
> + union perf_mem_data_src data_src_r;
> +
> + if (entry->mem_info)
> + data_src_l = entry->mem_info->data_src;
> + else
> + data_src_l.mem_lvl = PERF_MEM_LVL_NA;
> +
> + if (key->mem_info)
> + data_src_r = key->mem_info->data_src;
> + else
> + data_src_r.mem_lvl = PERF_MEM_LVL_NA;
> +
> + return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
> +}
> +
> static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1085,6 +1440,25 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
> }
>
> +static int64_t
> +sort__snoop_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + union perf_mem_data_src data_src_l;
> + union perf_mem_data_src data_src_r;
> +
> + if (entry->mem_info)
> + data_src_l = entry->mem_info->data_src;
> + else
> + data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
> +
> + if (key->mem_info)
> + data_src_r = key->mem_info->data_src;
> + else
> + data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
> +
> + return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
> +}
> +
> static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1158,6 +1532,70 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
> return 0;
> }
>
> +static int64_t
> +sort__dcacheline_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + u64 l, r;
> + struct map *l_map, *r_map;
> +
> + if (!entry->mem_info) return -1;
> + if (!key->mem_info) return 1;
> +
> + /* group event types together */
> + if (entry->cpumode > key->al->cpumode) return -1;
> + if (entry->cpumode < key->al->cpumode) return 1;
> +
> + l_map = entry->mem_info->daddr.map;
> + r_map = key->mem_info->daddr.map;
> +
> + /* if both are NULL, jump to sort on al_addr instead */
> + if (!l_map && !r_map)
> + goto addr;
> +
> + if (!l_map) return -1;
> + if (!r_map) return 1;
> +
> + if (l_map->maj > r_map->maj) return -1;
> + if (l_map->maj < r_map->maj) return 1;
> +
> + if (l_map->min > r_map->min) return -1;
> + if (l_map->min < r_map->min) return 1;
> +
> + if (l_map->ino > r_map->ino) return -1;
> + if (l_map->ino < r_map->ino) return 1;
> +
> + if (l_map->ino_generation > r_map->ino_generation) return -1;
> + if (l_map->ino_generation < r_map->ino_generation) return 1;
> +
> + /*
> + * Addresses with no major/minor numbers are assumed to be
> + * anonymous in userspace. Sort those on pid then address.
> + *
> + * The kernel and non-zero major/minor mapped areas are
> + * assumed to be unity mapped. Sort those on address.
> + */
> +
> + if ((entry->cpumode != PERF_RECORD_MISC_KERNEL) &&
> + (!(l_map->flags & MAP_SHARED)) &&
> + !l_map->maj && !l_map->min && !l_map->ino &&
> + !l_map->ino_generation) {
> + /* userspace anonymous */
> +
> + if (entry->thread->pid_ > key->al->thread->pid_) return -1;
> + if (entry->thread->pid_ < key->al->thread->pid_) return 1;
> + }
> +
> +addr:
> + /* al_addr does all the right addr - start + offset calculations */
> + l = cl_address(entry->mem_info->daddr.al_addr);
> + r = cl_address(key->mem_info->daddr.al_addr);
> +
> + if (l > r) return -1;
> + if (l < r) return 1;
> +
> + return 0;
> +}
> +
> static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1189,6 +1627,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_mispredict = {
> .se_header = "Branch Mispredicted",
> .se_cmp = sort__mispredict_cmp,
> + .se_cmp_key = sort__mispredict_cmp_key,
> .se_snprintf = hist_entry__mispredict_snprintf,
> .se_width_idx = HISTC_MISPREDICT,
> };
> @@ -1198,12 +1637,24 @@ static u64 he_weight(struct hist_entry *he)
> return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
> }
>
> +static u64 key_weight(struct hist_entry_cmp_key *key)
> +{
> + return key->sample->weight;
> +}
> +
> static int64_t
> sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
> {
> return he_weight(left) - he_weight(right);
> }
>
> +static int64_t
> +sort__local_weight_cmp_key(struct hist_entry *entry,
> + struct hist_entry_cmp_key *key)
> +{
> + return he_weight(entry) - key_weight(key);
> +}
> +
> static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1213,6 +1664,7 @@ static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_local_weight = {
> .se_header = "Local Weight",
> .se_cmp = sort__local_weight_cmp,
> + .se_cmp_key = sort__local_weight_cmp_key,
> .se_snprintf = hist_entry__local_weight_snprintf,
> .se_width_idx = HISTC_LOCAL_WEIGHT,
> };
> @@ -1223,6 +1675,13 @@ sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
> return left->stat.weight - right->stat.weight;
> }
>
> +static int64_t
> +sort__global_weight_cmp_key(struct hist_entry *entry,
> + struct hist_entry_cmp_key *key __maybe_unused)
> +{
> + return entry->stat.weight - key->sample->weight;
> +}
> +
> static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1232,6 +1691,7 @@ static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_global_weight = {
> .se_header = "Weight",
> .se_cmp = sort__global_weight_cmp,
> + .se_cmp_key = sort__global_weight_cmp_key,
> .se_snprintf = hist_entry__global_weight_snprintf,
> .se_width_idx = HISTC_GLOBAL_WEIGHT,
> };
> @@ -1239,6 +1699,7 @@ struct sort_entry sort_global_weight = {
> struct sort_entry sort_mem_daddr_sym = {
> .se_header = "Data Symbol",
> .se_cmp = sort__daddr_cmp,
> + .se_cmp_key = sort__daddr_cmp_key,
> .se_snprintf = hist_entry__daddr_snprintf,
> .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
> };
> @@ -1246,6 +1707,7 @@ struct sort_entry sort_mem_daddr_sym = {
> struct sort_entry sort_mem_iaddr_sym = {
> .se_header = "Code Symbol",
> .se_cmp = sort__iaddr_cmp,
> + .se_cmp_key = sort__iaddr_cmp_key,
> .se_snprintf = hist_entry__iaddr_snprintf,
> .se_width_idx = HISTC_MEM_IADDR_SYMBOL,
> };
> @@ -1253,6 +1715,7 @@ struct sort_entry sort_mem_iaddr_sym = {
> struct sort_entry sort_mem_daddr_dso = {
> .se_header = "Data Object",
> .se_cmp = sort__dso_daddr_cmp,
> + .se_cmp_key = sort__dso_daddr_cmp_key,
> .se_snprintf = hist_entry__dso_daddr_snprintf,
> .se_width_idx = HISTC_MEM_DADDR_DSO,
> };
> @@ -1260,6 +1723,7 @@ struct sort_entry sort_mem_daddr_dso = {
> struct sort_entry sort_mem_locked = {
> .se_header = "Locked",
> .se_cmp = sort__locked_cmp,
> + .se_cmp_key = sort__locked_cmp_key,
> .se_snprintf = hist_entry__locked_snprintf,
> .se_width_idx = HISTC_MEM_LOCKED,
> };
> @@ -1267,6 +1731,7 @@ struct sort_entry sort_mem_locked = {
> struct sort_entry sort_mem_tlb = {
> .se_header = "TLB access",
> .se_cmp = sort__tlb_cmp,
> + .se_cmp_key = sort__tlb_cmp_key,
> .se_snprintf = hist_entry__tlb_snprintf,
> .se_width_idx = HISTC_MEM_TLB,
> };
> @@ -1274,6 +1739,7 @@ struct sort_entry sort_mem_tlb = {
> struct sort_entry sort_mem_lvl = {
> .se_header = "Memory access",
> .se_cmp = sort__lvl_cmp,
> + .se_cmp_key = sort__lvl_cmp_key,
> .se_snprintf = hist_entry__lvl_snprintf,
> .se_width_idx = HISTC_MEM_LVL,
> };
> @@ -1281,6 +1747,7 @@ struct sort_entry sort_mem_lvl = {
> struct sort_entry sort_mem_snoop = {
> .se_header = "Snoop",
> .se_cmp = sort__snoop_cmp,
> + .se_cmp_key = sort__snoop_cmp_key,
> .se_snprintf = hist_entry__snoop_snprintf,
> .se_width_idx = HISTC_MEM_SNOOP,
> };
> @@ -1288,6 +1755,7 @@ struct sort_entry sort_mem_snoop = {
> struct sort_entry sort_mem_dcacheline = {
> .se_header = "Data Cacheline",
> .se_cmp = sort__dcacheline_cmp,
> + .se_cmp_key = sort__dcacheline_cmp_key,
> .se_snprintf = hist_entry__dcacheline_snprintf,
> .se_width_idx = HISTC_MEM_DCACHELINE,
> };
> @@ -1305,6 +1773,19 @@ sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
> return (int64_t)(r - l);
> }
>
> +static int64_t
> +sort__phys_daddr_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + uint64_t l = 0, r = 0;
> +
> + if (entry->mem_info)
> + l = entry->mem_info->daddr.phys_addr;
> + if (key->mem_info)
> + r = key->mem_info->daddr.phys_addr;
> +
> + return (int64_t)(r - l);
> +}
> +
> static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1329,6 +1810,7 @@ static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_mem_phys_daddr = {
> .se_header = "Data Physical Address",
> .se_cmp = sort__phys_daddr_cmp,
> + .se_cmp_key = sort__phys_daddr_cmp_key,
> .se_snprintf = hist_entry__phys_daddr_snprintf,
> .se_width_idx = HISTC_MEM_PHYS_DADDR,
> };
> @@ -1343,6 +1825,16 @@ sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
> right->branch_info->flags.abort;
> }
>
> +static int64_t
> +sort__abort_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + return entry->branch_info->flags.abort !=
> + key->bi->flags.abort;
> +}
> +
> static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1361,6 +1853,7 @@ static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_abort = {
> .se_header = "Transaction abort",
> .se_cmp = sort__abort_cmp,
> + .se_cmp_key = sort__abort_cmp_key,
> .se_snprintf = hist_entry__abort_snprintf,
> .se_width_idx = HISTC_ABORT,
> };
> @@ -1375,6 +1868,16 @@ sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
> right->branch_info->flags.in_tx;
> }
>
> +static int64_t
> +sort__in_tx_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + if (!entry->branch_info || !key->bi)
> + return cmp_null(entry->branch_info, key->bi);
> +
> + return entry->branch_info->flags.in_tx !=
> + key->bi->flags.in_tx;
> +}
> +
> static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
> size_t size, unsigned int width)
> {
> @@ -1393,6 +1896,7 @@ static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_in_tx = {
> .se_header = "Branch in transaction",
> .se_cmp = sort__in_tx_cmp,
> + .se_cmp_key = sort__in_tx_cmp_key,
> .se_snprintf = hist_entry__in_tx_snprintf,
> .se_width_idx = HISTC_IN_TX,
> };
> @@ -1403,6 +1907,12 @@ sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
> return left->transaction - right->transaction;
> }
>
> +static int64_t
> +sort__transaction_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return entry->transaction - key->sample->transaction;
> +}
> +
> static inline char *add_str(char *p, const char *str)
> {
> strcpy(p, str);
> @@ -1465,6 +1975,7 @@ static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_transaction = {
> .se_header = "Transaction ",
> .se_cmp = sort__transaction_cmp,
> + .se_cmp_key = sort__transaction_cmp_key,
> .se_snprintf = hist_entry__transaction_snprintf,
> .se_width_idx = HISTC_TRANSACTION,
> };
> @@ -1486,6 +1997,12 @@ sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right)
> return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
> }
>
> +static int64_t
> +sort__sym_size_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return _sort__sym_size_cmp(key->al->sym, entry->ms.sym);
> +}
> +
> static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
> size_t bf_size, unsigned int width)
> {
> @@ -1504,6 +2021,7 @@ static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_sym_size = {
> .se_header = "Symbol size",
> .se_cmp = sort__sym_size_cmp,
> + .se_cmp_key = sort__sym_size_cmp_key,
> .se_snprintf = hist_entry__sym_size_snprintf,
> .se_width_idx = HISTC_SYM_SIZE,
> };
> @@ -1525,6 +2043,12 @@ sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
> return _sort__dso_size_cmp(right->ms.map, left->ms.map);
> }
>
> +static int64_t
> +sort__dso_size_cmp_key(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + return _sort__dso_size_cmp(key->al->map, entry->ms.map);
> +}
> +
> static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
> size_t bf_size, unsigned int width)
> {
> @@ -1544,6 +2068,7 @@ static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
> struct sort_entry sort_dso_size = {
> .se_header = "DSO size",
> .se_cmp = sort__dso_size_cmp,
> + .se_cmp_key = sort__dso_size_cmp_key,
> .se_snprintf = hist_entry__dso_size_snprintf,
> .se_width_idx = HISTC_DSO_SIZE,
> };
> @@ -1693,12 +2218,13 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> }
>
> static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
> - struct hist_entry *a, struct hist_entry *b)
> + struct hist_entry *entry,
> + struct hist_entry_cmp_key *key)
> {
> struct hpp_sort_entry *hse;
>
> hse = container_of(fmt, struct hpp_sort_entry, hpp);
> - return hse->se->se_cmp(a, b);
> + return hse->se->se_cmp_key(entry, key);
> }
>
> static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
> @@ -2089,9 +2615,37 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
> return memcmp(a->raw_data + offset, b->raw_data + offset, size);
> }
>
> +static int64_t __sort__hde_cmp_key(struct perf_hpp_fmt *fmt,
> + struct hist_entry *a,
> + struct hist_entry_cmp_key *key)
> +{
> + struct hpp_dynamic_entry *hde;
> + struct tep_format_field *field;
> + unsigned offset, size;
> +
> + hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
> + field = hde->field;
> + if (field->flags & TEP_FIELD_IS_DYNAMIC) {
> + unsigned long long dyn;
> +
> + tep_read_number_field(field, a->raw_data, &dyn);
> + offset = dyn & 0xffff;
> + size = (dyn >> 16) & 0xffff;
> +
> + /* record max width for output */
> + if (size > hde->dynamic_len)
> + hde->dynamic_len = size;
> + } else {
> + offset = field->offset;
> + size = field->size;
> + }
> +
> + return memcmp(a->raw_data + offset, key->sample->raw_data + offset, size);
> +}
> +
> bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
> {
> - return fmt->cmp == __sort__hde_cmp;
> + return fmt->cmp == __sort__hde_cmp_key;
> }
>
> static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
> @@ -2138,7 +2692,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct tep_format_field *field,
> hde->hpp.entry = __sort__hde_entry;
> hde->hpp.color = NULL;
>
> - hde->hpp.cmp = __sort__hde_cmp;
> + hde->hpp.cmp = __sort__hde_cmp_key;
> hde->hpp.collapse = __sort__hde_cmp;
> hde->hpp.sort = __sort__hde_cmp;
> hde->hpp.equal = __sort__hde_equal;
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index a97cf8e..da85224 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -264,6 +264,7 @@ struct sort_entry {
> const char *se_header;
>
> int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
> + int64_t (*se_cmp_key)(struct hist_entry *, struct hist_entry_cmp_key *);
> int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
> int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
> int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 3badd7f..78df16b 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -150,7 +150,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> struct perf_hpp;
> struct perf_hpp_fmt;
>
> -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
> int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
> int hist_entry__transaction_len(void);
> int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
> @@ -238,6 +237,18 @@ struct perf_hpp {
> void *ptr;
> };
>
> +struct hist_entry_cmp_key {
> + struct addr_location *al;
> + struct comm *comm;
> + struct branch_info *bi;
> + struct symbol *sym_parent;
> + struct perf_sample *sample;
> + struct mem_info *mem_info;
> + char *srcfile;
> + char *trace_output;
> +};
> +
> +struct comm;
> struct perf_hpp_fmt {
> const char *name;
> int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> @@ -249,7 +260,8 @@ struct perf_hpp_fmt {
> int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> struct hist_entry *he);
> int64_t (*cmp)(struct perf_hpp_fmt *fmt,
> - struct hist_entry *a, struct hist_entry *b);
> + struct hist_entry *entry,
> + struct hist_entry_cmp_key *key);
> int64_t (*collapse)(struct perf_hpp_fmt *fmt,
> struct hist_entry *a, struct hist_entry *b);
> int64_t (*sort)(struct perf_hpp_fmt *fmt,
> @@ -525,4 +537,8 @@ static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t s
> return __hists__scnprintf_title(hists, bf, size, true);
> }
>
> +extern unsigned long hist_lookups;
> +extern unsigned long hist_hits;
> +extern unsigned long hist_misses;
> +
> #endif /* __PERF_HIST_H */
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index 828cb97..a4deb5d 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -364,16 +364,49 @@ void hists__delete_entries(struct hists *hists)
> }
> }
>
> +static u8 symbol__parent_filter(const struct symbol *parent)
> +{
> + if (symbol_conf.exclude_other && parent == NULL)
> + return 1 << HIST_FILTER__PARENT;
> + return 0;
> +}
> +
> /*
> * histogram, sorted on item, collects periods
> */
>
> static int hist_entry__init(struct hist_entry *he,
> - struct hist_entry *template,
> + struct hist_entry_cmp_key *key,
> + struct hists *hists,
> bool sample_self,
> size_t callchain_size)
> {
> - *he = *template;
> + struct namespaces *ns = thread__namespaces(key->al->thread);
> +
> + he->thread = key->al->thread;
> + he->comm = thread__comm(he->thread);
> + he->cgroup_id.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0;
> + he->cgroup_id.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0;
> + he->ms.map = key->al->map;
> + he->ms.sym = key->al->sym;
> + he->srcline = key->al->srcline ? strdup(key->al->srcline) : NULL;
> + he->socket = key->al->socket;
> + he->cpu = key->al->cpu;
> + he->cpumode = key->al->cpumode;
> + he->ip = key->al->addr;
> + he->level = key->al->level;
> + he->stat.nr_events = 1;
> + he->stat.period = key->sample->period;
> + he->stat.weight = key->sample->weight;
> + he->parent = key->sym_parent;
> + he->filtered = symbol__parent_filter(key->sym_parent) | key->al->filtered;
> + he->hists = hists;
> + he->branch_info = key->bi;
> + he->mem_info = key->mem_info;
> + he->transaction = key->sample->transaction;
> + he->raw_data = key->sample->raw_data;
> + he->raw_size = key->sample->raw_size;
> +
> he->callchain_size = callchain_size;
>
> if (symbol_conf.cumulate_callchain) {
> @@ -400,7 +433,7 @@ static int hist_entry__init(struct hist_entry *he,
> return -ENOMEM;
> }
>
> - memcpy(he->branch_info, template->branch_info,
> + memcpy(he->branch_info, key->bi,
> sizeof(*he->branch_info));
>
> map__get(he->branch_info->from.map);
> @@ -459,23 +492,25 @@ static struct hist_entry_ops default_ops = {
> .free = hist_entry__free,
> };
>
> -static struct hist_entry *hist_entry__new(struct hist_entry *template,
> +static struct hist_entry *hist_entry__new(struct hist_entry_cmp_key *key,
> + struct hists *hists,
> + struct hist_entry_ops *ops,
> bool sample_self)
> {
> - struct hist_entry_ops *ops = template->ops;
> size_t callchain_size = 0;
> struct hist_entry *he;
> int err = 0;
>
> if (!ops)
> - ops = template->ops = &default_ops;
> + ops = &default_ops;
>
> if (symbol_conf.use_callchain)
> callchain_size = sizeof(struct callchain_root);
>
> he = ops->new(callchain_size);
> if (he) {
> - err = hist_entry__init(he, template, sample_self, callchain_size);
> + he->ops = ops;
> + err = hist_entry__init(he, key, hists, sample_self, callchain_size);
> if (err) {
> ops->free(he);
> he = NULL;
> @@ -485,13 +520,6 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
> return he;
> }
>
> -static u8 symbol__parent_filter(const struct symbol *parent)
> -{
> - if (symbol_conf.exclude_other && parent == NULL)
> - return 1 << HIST_FILTER__PARENT;
> - return 0;
> -}
> -
> static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
> {
> if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
> @@ -502,17 +530,43 @@ static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
> he->hists->callchain_non_filtered_period += period;
> }
>
> +static int64_t
> +hist_entry__cmp(struct hist_entry *entry, struct hist_entry_cmp_key *key)
> +{
> + struct hists *hists = entry->hists;
> + struct perf_hpp_fmt *fmt;
> + int64_t cmp = 0;
> +
> + hists__for_each_sort_list(hists, fmt) {
> + if (perf_hpp__is_dynamic_entry(fmt) &&
> + !perf_hpp__defined_dynamic_entry(fmt, hists))
> + continue;
> +
> + cmp = fmt->cmp(fmt, entry, key);
> + if (cmp)
> + break;
> + }
> +
> + return cmp;
> +}
> +
> +unsigned long hist_lookups;
> +unsigned long hist_hits;
> +unsigned long hist_misses;
> +
> static struct hist_entry *hists__findnew_entry(struct hists *hists,
> - struct hist_entry *entry,
> - struct addr_location *al,
> + struct hist_entry_cmp_key *key,
> + struct hist_entry_ops *ops,
> bool sample_self)
> {
> struct rb_node **p;
> struct rb_node *parent = NULL;
> struct hist_entry *he;
> int64_t cmp;
> - u64 period = entry->stat.period;
> - u64 weight = entry->stat.weight;
> + u64 period = key->sample->period;
> + u64 weight = key->sample->weight;
> +
> + hist_lookups++;
>
> p = &hists->entries_in->rb_node;
>
> @@ -526,7 +580,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
> * function when searching an entry regardless which sort
> * keys were used.
> */
> - cmp = hist_entry__cmp(he, entry);
> + cmp = hist_entry__cmp(he, key);
>
> if (!cmp) {
> if (sample_self) {
> @@ -540,7 +594,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
> * This mem info was allocated from sample__resolve_mem
> * and will not be used anymore.
> */
> - mem_info__zput(entry->mem_info);
> + mem_info__zput(key->mem_info);
>
> /* If the map of an existing hist_entry has
> * become out-of-date due to an exec() or
> @@ -548,10 +602,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
> * mis-adjust symbol addresses when computing
> * the history counter to increment.
> */
> - if (he->ms.map != entry->ms.map) {
> + if (he->ms.map != key->al->map) {
> map__put(he->ms.map);
> - he->ms.map = map__get(entry->ms.map);
> + he->ms.map = map__get(key->al->map);
> }
> + hist_hits++;
> goto out;
> }
>
> @@ -561,7 +616,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
> p = &(*p)->rb_right;
> }
>
> - he = hist_entry__new(entry, sample_self);
> + hist_misses++;
> + he = hist_entry__new(key, hists, ops, sample_self);
> if (!he)
> return NULL;
>
> @@ -573,9 +629,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
> rb_insert_color(&he->rb_node_in, hists->entries_in);
> out:
> if (sample_self)
> - he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
> + he_stat__add_cpumode_period(&he->stat, key->al->cpumode, period);
> if (symbol_conf.cumulate_callchain)
> - he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
> + he_stat__add_cpumode_period(he->stat_acc, key->al->cpumode, period);
> return he;
> }
>
> @@ -589,39 +645,19 @@ __hists__add_entry(struct hists *hists,
> bool sample_self,
> struct hist_entry_ops *ops)
> {
> - struct namespaces *ns = thread__namespaces(al->thread);
> - struct hist_entry entry = {
> - .thread = al->thread,
> - .comm = thread__comm(al->thread),
> - .cgroup_id = {
> - .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
> - .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
> - },
> - .ms = {
> - .map = al->map,
> - .sym = al->sym,
> - },
> - .srcline = al->srcline ? strdup(al->srcline) : NULL,
> - .socket = al->socket,
> - .cpu = al->cpu,
> - .cpumode = al->cpumode,
> - .ip = al->addr,
> - .level = al->level,
> - .stat = {
> - .nr_events = 1,
> - .period = sample->period,
> - .weight = sample->weight,
> - },
> - .parent = sym_parent,
> - .filtered = symbol__parent_filter(sym_parent) | al->filtered,
> - .hists = hists,
> - .branch_info = bi,
> - .mem_info = mi,
> - .transaction = sample->transaction,
> - .raw_data = sample->raw_data,
> - .raw_size = sample->raw_size,
> - .ops = ops,
> - }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
> + struct hist_entry_cmp_key key;
> + struct hist_entry *he;
> +
> + key.al = al;
> + key.comm = thread__comm(al->thread);
> + key.bi = bi;
> + key.sym_parent = sym_parent;
> + key.sample = sample;
> + key.mem_info = mi;
> + key.srcfile = NULL;
> + key.trace_output = NULL;
> +
> + he = hists__findnew_entry(hists, &key, ops, sample_self);
>
> if (!hists->has_callchains && he && he->callchain_size != 0)
> hists->has_callchains = true;
> @@ -947,7 +983,9 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
> struct perf_evsel *evsel = iter->evsel;
> struct perf_sample *sample = iter->sample;
> struct hist_entry **he_cache = iter->priv;
> + struct hist_entry_cmp_key key;
> struct hist_entry *he;
> +#if 0
> struct hist_entry he_tmp = {
> .hists = evsel__hists(evsel),
> .cpu = al->cpu,
> @@ -963,6 +1001,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
> .raw_data = sample->raw_data,
> .raw_size = sample->raw_size,
> };
> +#endif
> int i;
> struct callchain_cursor cursor;
>
> @@ -974,8 +1013,16 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
> * Check if there's duplicate entries in the callchain.
> * It's possible that it has cycles or recursive calls.
> */
> + key.al = al;
> + key.comm = thread__comm(al->thread);
> + key.bi = NULL;
> + key.sym_parent = iter->parent;
> + key.sample = sample;
> + key.mem_info = NULL;
> + key.srcfile = NULL;
> + key.trace_output = NULL;
> for (i = 0; i < iter->curr; i++) {
> - if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
> + if (hist_entry__cmp(he_cache[i], &key) == 0) {
> /* to avoid calling callback function */
> iter->he = NULL;
> return 0;
> @@ -1088,26 +1135,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
> }
>
> int64_t
> -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
> -{
> - struct hists *hists = left->hists;
> - struct perf_hpp_fmt *fmt;
> - int64_t cmp = 0;
> -
> - hists__for_each_sort_list(hists, fmt) {
> - if (perf_hpp__is_dynamic_entry(fmt) &&
> - !perf_hpp__defined_dynamic_entry(fmt, hists))
> - continue;
> -
> - cmp = fmt->cmp(fmt, left, right);
> - if (cmp)
> - break;
> - }
> -
> - return cmp;
> -}
> -
> -int64_t
> hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
> {
> struct hists *hists = left->hists;
> @@ -1312,7 +1339,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
> p = &parent->rb_right;
> }
>
> - new = hist_entry__new(he, true);
> +#if 1
> + new = NULL;
> +#else
> + new = hist_entry__new(he, true); /* XXX fix XXX */
> +#endif
> if (new == NULL)
> return NULL;
>
> @@ -2168,7 +2199,11 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
> p = &(*p)->rb_right;
> }
>
> - he = hist_entry__new(pair, true);
> +#if 1
> + he = NULL;
> +#else
> + he = hist_entry__new(pair, true); /* XXX fix XXX */
> +#endif
> if (he) {
> memset(&he->stat, 0, sizeof(he->stat));
> he->hists = hists;
> @@ -2213,7 +2248,11 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
> p = &parent->rb_right;
> }
>
> - he = hist_entry__new(pair, true);
> +#if 1
> + he = NULL;
> +#else
> + he = hist_entry__new(pair, true); /* XXX fix XXX */
> +#endif
> if (he) {
> rb_link_node(&he->rb_node_in, parent, p);
> rb_insert_color(&he->rb_node_in, root);
> diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
> index fe3dfaa..a3d66e1 100644
> --- a/tools/perf/ui/hist.c
> +++ b/tools/perf/ui/hist.c
> @@ -372,8 +372,15 @@ HPP_RAW_FNS(samples, nr_events)
> HPP_RAW_FNS(period, period)
>
> static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
> - struct hist_entry *a __maybe_unused,
> - struct hist_entry *b __maybe_unused)
> + struct hist_entry *entry __maybe_unused,
> + struct hist_entry_cmp_key *key __maybe_unused)
> +{
> + return 0;
> +}
> +
> +static int64_t hpp__nop_collapse(struct perf_hpp_fmt *fmt __maybe_unused,
> + struct hist_entry *a __maybe_unused,
> + struct hist_entry *b __maybe_unused)
> {
> return 0;
> }
> @@ -399,7 +406,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
> .color = hpp__color_ ## _fn, \
> .entry = hpp__entry_ ## _fn, \
> .cmp = hpp__nop_cmp, \
> - .collapse = hpp__nop_cmp, \
> + .collapse = hpp__nop_collapse, \
> .sort = hpp__sort_ ## _fn, \
> .idx = PERF_HPP__ ## _idx, \
> .equal = hpp__equal, \
> @@ -413,7 +420,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
> .color = hpp__color_ ## _fn, \
> .entry = hpp__entry_ ## _fn, \
> .cmp = hpp__nop_cmp, \
> - .collapse = hpp__nop_cmp, \
> + .collapse = hpp__nop_collapse, \
> .sort = hpp__sort_ ## _fn, \
> .idx = PERF_HPP__ ## _idx, \
> .equal = hpp__equal, \
> @@ -426,7 +433,7 @@ static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
> .width = hpp__width_fn, \
> .entry = hpp__entry_ ## _fn, \
> .cmp = hpp__nop_cmp, \
> - .collapse = hpp__nop_cmp, \
> + .collapse = hpp__nop_collapse, \
> .sort = hpp__sort_ ## _fn, \
> .idx = PERF_HPP__ ## _idx, \
> .equal = hpp__equal, \
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index f3aa9d0..190f5eb 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -1717,12 +1717,13 @@ static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
> }
>
> static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
> - struct hist_entry *a, struct hist_entry *b)
> + struct hist_entry *entry,
> + struct hist_entry_cmp_key *key)
> {
> struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
> struct c2c_dimension *dim = c2c_fmt->dim;
>
> - return dim->se->se_cmp(a, b);
> + return dim->se->se_cmp_key(entry, key);
> }
>
> static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
> @@ -1755,8 +1756,13 @@ static struct c2c_fmt *get_format(const char *name)
> INIT_LIST_HEAD(&fmt->list);
> INIT_LIST_HEAD(&fmt->sort_list);
>
> +#if 1
> + fmt->cmp = c2c_se_cmp;
> + fmt->sort = dim->cmp;
> +#else
> fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
> fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
> +#endif
> fmt->color = dim->se ? NULL : dim->color;
> fmt->entry = dim->se ? c2c_se_entry : dim->entry;
> fmt->header = c2c_header;
> diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
> index 39db2ee..2684efa 100644
> --- a/tools/perf/builtin-diff.c
> +++ b/tools/perf/builtin-diff.c
> @@ -604,8 +604,16 @@ hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right,
>
> static int64_t
> hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused,
> - struct hist_entry *left __maybe_unused,
> - struct hist_entry *right __maybe_unused)
> + struct hist_entry *entry __maybe_unused,
> + struct hist_entry_cmp_key *key __maybe_unused)
> +{
> + return 0;
> +}
> +
> +static int64_t
> +hist_entry__collapse_nop(struct perf_hpp_fmt *fmt __maybe_unused,
> + struct hist_entry *a __maybe_unused,
> + struct hist_entry *b __maybe_unused)
> {
> return 0;
> }
> @@ -1141,7 +1149,7 @@ static void data__hpp_register(struct data__file *d, int idx)
> fmt->width = hpp__width;
> fmt->entry = hpp__entry_global;
> fmt->cmp = hist_entry__cmp_nop;
> - fmt->collapse = hist_entry__cmp_nop;
> + fmt->collapse = hist_entry__collapse_nop;
>
> /* TODO more colors */
> switch (idx) {
> @@ -1166,7 +1174,7 @@ static void data__hpp_register(struct data__file *d, int idx)
> fmt->sort = hist_entry__cmp_delta_abs;
> break;
> default:
> - fmt->sort = hist_entry__cmp_nop;
> + fmt->sort = hist_entry__collapse_nop;
> break;
> }
>
> @@ -1230,7 +1238,7 @@ static int ui_init(void)
> }
>
> fmt->cmp = hist_entry__cmp_nop;
> - fmt->collapse = hist_entry__cmp_nop;
> + fmt->collapse = hist_entry__collapse_nop;
>
> switch (compute) {
> case COMPUTE_DELTA: