RE: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to print out at the very end of print out

From: Wang, Weilin
Date: Mon Apr 01 2024 - 17:09:06 EST




> -----Original Message-----
> From: Namhyung Kim <namhyung@xxxxxxxxxx>
> Sent: Monday, April 1, 2024 2:04 PM
> To: Wang, Weilin <weilin.wang@xxxxxxxxx>
> Cc: Ian Rogers <irogers@xxxxxxxxxx>; Arnaldo Carvalho de Melo
> <acme@xxxxxxxxxx>; Peter Zijlstra <peterz@xxxxxxxxxxxxx>; Ingo Molnar
> <mingo@xxxxxxxxxx>; Alexander Shishkin
> <alexander.shishkin@xxxxxxxxxxxxxxx>; Jiri Olsa <jolsa@xxxxxxxxxx>; Hunter,
> Adrian <adrian.hunter@xxxxxxxxx>; Kan Liang <kan.liang@xxxxxxxxxxxxxxx>;
> linux-perf-users@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Taylor, Perry
> <perry.taylor@xxxxxxxxx>; Alt, Samantha <samantha.alt@xxxxxxxxx>; Biggers,
> Caleb <caleb.biggers@xxxxxxxxx>
> Subject: Re: [RFC PATCH v6 4/5] perf stat: Add retire latency print functions to
> print out at the very end of print out
>
> On Fri, Mar 29, 2024 at 12:12 PM <weilin.wang@xxxxxxxxx> wrote:
> >
> > From: Weilin Wang <weilin.wang@xxxxxxxxx>
> >
> > Add print out functions so that users could read retire latency values.
> >
> > Example output:
> > In this simple example, there is no MEM_INST_RETIRED.STLB_HIT_STORES
> sample.
> > Therefore, the MEM_INST_RETIRED.STLB_HIT_STORES:p retire_latency
> value, count
> > and sum are all 0.
> >
> > Performance counter stats for 'system wide':
> >
> > 181,047,168 cpu_core/TOPDOWN.SLOTS/ # 0.6 %
> tma_dtlb_store
> > 3,195,608 cpu_core/topdown-retiring/
> > 40,156,649 cpu_core/topdown-mem-bound/
> > 3,550,925 cpu_core/topdown-bad-spec/
> > 117,571,818 cpu_core/topdown-fe-bound/
> > 57,118,087 cpu_core/topdown-be-bound/
> > 69,179 cpu_core/EXE_ACTIVITY.BOUND_ON_STORES/
> > 4,582 cpu_core/MEM_INST_RETIRED.STLB_HIT_STORES/
> > 30,183,104 cpu_core/CPU_CLK_UNHALTED.DISTRIBUTED/
> > 30,556,790 cpu_core/CPU_CLK_UNHALTED.THREAD/
> > 168,486 cpu_core/DTLB_STORE_MISSES.WALK_ACTIVE/
> > 0.00 MEM_INST_RETIRED.STLB_HIT_STORES:p 0 0
>
> The output is not aligned and I think it's hard to read.
> I think it should print the result like this:
>
> <sum> <event-name> # <val> average retired latency

Since we would like to use the average retire latency, I would think put average
at the beginning would be more consistent. So in format like:
<val> <event-name> <sum> <count> or <val> <event-name> <count> <sum> ?

I will work on the alignment.

Thanks,
Weilin

>
> Thanks,
> Namhyung
>
>
> >
> > 1.003105924 seconds time elapsed
> >
> > Signed-off-by: Weilin Wang <weilin.wang@xxxxxxxxx>
> > Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>
> > ---
> > tools/perf/util/stat-display.c | 65
> ++++++++++++++++++++++++++++++++++
> > 1 file changed, 65 insertions(+)
> >
> > diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> > index bfc1d705f437..6c043d9c9f81 100644
> > --- a/tools/perf/util/stat-display.c
> > +++ b/tools/perf/util/stat-display.c
> > @@ -21,6 +21,7 @@
> > #include "iostat.h"
> > #include "pmu.h"
> > #include "pmus.h"
> > +#include "metricgroup.h"
> >
> > #define CNTR_NOT_SUPPORTED "<not supported>"
> > #define CNTR_NOT_COUNTED "<not counted>"
> > @@ -34,6 +35,7 @@
> > #define COMM_LEN 16
> > #define PID_LEN 7
> > #define CPUS_LEN 4
> > +#define RETIRE_LEN 8
> >
> > static int aggr_header_lens[] = {
> > [AGGR_CORE] = 18,
> > @@ -426,6 +428,67 @@ static void print_metric_std(struct
> perf_stat_config *config,
> > fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
> > }
> >
> > +static void print_retire_lat_std(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + bool newline = os->newline;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > +
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + if (newline)
> > + do_new_line_std(config, os);
> > + fprintf(out, "%'*.2f %-*s", COUNTS_LEN, t->val, EVNAME_LEN, t-
> >event.name);
> > + fprintf(out, "%*ld %*d\n", RETIRE_LEN, t->count,
> > + RETIRE_LEN, t->sum);
> > + }
> > +}
> > +
> > +static void print_retire_lat_csv(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > + const char *sep = config->csv_sep;
> > +
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + fprintf(out, "%f%s%s%s%s%ld%s%d\n", t->val, sep, sep, t-
> >event.name, sep,
> > + t->count, sep, t->sum);
> > + }
> > +}
> > +
> > +static void print_retire_lat_json(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + FILE *out = os->fh;
> > + struct tpebs_retire_lat *t;
> > + struct list_head *retire_lats = &config->tpebs_results;
> > +
> > + fprintf(out, "{");
> > + list_for_each_entry(t, retire_lats, event.nd) {
> > + fprintf(out, "\"retire_latency-value\" : \"%f\", ", t->val);
> > + fprintf(out, "\"event-name\" : \"%s\"", t->event.name);
> > + fprintf(out, "\"sample-counts\" : \"%ld\"", t->count);
> > + fprintf(out, "\"retire_latency-sum\" : \"%d\"", t->sum);
> > + }
> > + fprintf(out, "}");
> > +}
> > +
> > +static void print_retire_lat(struct perf_stat_config *config,
> > + struct outstate *os)
> > +{
> > + if (!&config->tpebs_results)
> > + return;
> > + if (config->json_output)
> > + print_retire_lat_json(config, os);
> > + else if (config->csv_output)
> > + print_retire_lat_csv(config, os);
> > + else
> > + print_retire_lat_std(config, os);
> > +}
> > +
> > static void new_line_csv(struct perf_stat_config *config, void *ctx)
> > {
> > struct outstate *os = ctx;
> > @@ -1609,6 +1672,8 @@ void evlist__print_counters(struct evlist *evlist,
> struct perf_stat_config *conf
> > break;
> > }
> >
> > + print_retire_lat(config, &os);
> > +
> > print_footer(config);
> >
> > fflush(config->output);
> > --
> > 2.43.0
> >