Re: [RFC PATCH] perf script python: Allow reporting [un]throttle

From: Jiri Olsa
Date: Tue Aug 31 2021 - 14:46:39 EST


On Mon, Aug 16, 2021 at 05:21:33PM -0700, Stephen Brennan wrote:
> perf_events may sometimes throttle an event due to creating too many
> samples during a given timer tick. As of now, the perf tool will not
> report on throttling, which means this is a silent error. Implement a
> callback for the throttle and unthrottle events within the Python
> scripting engine, which can allow scripts to detect and report when
> events may have been lost due to throttling.
>
> A python script could simply define throttle() and unthrottle()
> functions to begin receiving them, e.g.:
>
> ```
> from __future__ import print_function
>
> def process_event(param_dict):
> print("event cpu={} time={}".format(
> param_dict["sample"]["cpu"], param_dict["sample"]["time"]))
>
> def throttle(*args):
> print("throttle(time={}, cpu={}, pid={}, tid={})".format(*args))
>
> def unthrottle(*args):
> print("unthrottle(time={}, cpu={}, pid={}, tid={})".format(*args))
> ```

throttle event has also 'id' and 'stream_id' I guess you don't
need it, but maybe we should add it to be complete

otherwose looks good

jirka

>
> Signed-off-by: Stephen Brennan <stephen.s.brennan@xxxxxxxxxx>
> ---
>
> Since this mailing list thread[1] I've been wondering about ways to
> detect and handle throttling. Perf will warn when events are missed
> because the ring buffer filled up, but it will not give any indication
> about the throttling -- except for the throttle message logged by the
> kernel. Ideally, I'd like to also extend the other perf tools to give
> a warning, but detecting it after the fact via a script was easiest to
> implement, and most flexible for me to use. I'd appreciate feedback on
> this change as well as what such a warning in perf record/report would
> look like. For example:
>
> [ perf record: WARNING: samples were throttled %u times for %u seconds ]
>
> [1]: https://lore.kernel.org/linux-perf-users/87lf6rclcm.fsf@xxxxxxxxxxxxxxxxxxxxxxxxxx/T/#u
>
> tools/perf/builtin-script.c | 13 ++++++++
> .../scripting-engines/trace-event-python.c | 30 +++++++++++++++++++
> tools/perf/util/trace-event.h | 3 ++
> 3 files changed, 46 insertions(+)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 064da7f3618d..072869a35f85 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2492,6 +2492,17 @@ process_lost_event(struct perf_tool *tool,
> sample->tid);
> }
>
> +static int
> +process_throttle_event(struct perf_tool *tool __maybe_unused,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct machine *machine)
> +{
> + if (scripting_ops && scripting_ops->process_throttle)
> + scripting_ops->process_throttle(event, sample, machine);
> + return 0;
> +}
> +
> static int
> process_finished_round_event(struct perf_tool *tool __maybe_unused,
> union perf_event *event,
> @@ -3652,6 +3663,8 @@ int cmd_script(int argc, const char **argv)
> .stat_config = process_stat_config_event,
> .thread_map = process_thread_map_event,
> .cpu_map = process_cpu_map_event,
> + .throttle = process_throttle_event,
> + .unthrottle = process_throttle_event,
> .ordered_events = true,
> .ordering_requires_timestamps = true,
> },
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index 69129e2aa7a1..5ef1ba5e29bb 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -1422,6 +1422,35 @@ static void python_process_event(union perf_event *event,
> }
> }
>
> +static void python_process_throttle(union perf_event *event,
> + struct perf_sample *sample,
> + struct machine *machine)
> +{
> + const char *handler_name;
> + PyObject *handler, *t;
> +
> + if (event->header.type == PERF_RECORD_THROTTLE)
> + handler_name = "throttle";
> + else
> + handler_name = "unthrottle";
> + handler = get_handler(handler_name);
> + if (!handler)
> + return;
> +
> + t = tuple_new(4);
> + if (!t)
> + return;
> +
> + tuple_set_u64(t, 0, sample->time);
> + tuple_set_s32(t, 1, sample->cpu);
> + tuple_set_s32(t, 2, sample->pid);
> + tuple_set_s32(t, 3, sample->tid);
> +
> + call_object(handler, t, handler_name);
> +
> + Py_DECREF(t);
> +}
> +
> static void python_do_process_switch(union perf_event *event,
> struct perf_sample *sample,
> struct machine *machine)
> @@ -2079,5 +2108,6 @@ struct scripting_ops python_scripting_ops = {
> .process_auxtrace_error = python_process_auxtrace_error,
> .process_stat = python_process_stat,
> .process_stat_interval = python_process_stat_interval,
> + .process_throttle = python_process_throttle,
> .generate_script = python_generate_script,
> };
> diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
> index 54aadeedf28c..640981105788 100644
> --- a/tools/perf/util/trace-event.h
> +++ b/tools/perf/util/trace-event.h
> @@ -90,6 +90,9 @@ struct scripting_ops {
> void (*process_stat)(struct perf_stat_config *config,
> struct evsel *evsel, u64 tstamp);
> void (*process_stat_interval)(u64 tstamp);
> + void (*process_throttle)(union perf_event *event,
> + struct perf_sample *sample,
> + struct machine *machine);
> int (*generate_script) (struct tep_handle *pevent, const char *outfile);
> };
>
> --
> 2.30.2
>