Re: [PATCH v12 10/30] tracing: Add 'hist' event trigger command

From: Namhyung Kim
Date: Sun Nov 29 2015 - 09:55:23 EST


Hi Tom,

On Mon, Nov 23, 2015 at 01:51:25PM -0600, Tom Zanussi wrote:
> 'hist' triggers allow users to continually aggregate trace events,
> which can then be viewed afterwards by simply reading a 'hist' file
> containing the aggregation in a human-readable format.
>
> The basic idea is very simple and boils down to a mechanism whereby
> trace events, rather than being exhaustively dumped in raw form and
> viewed directly, are automatically 'compressed' into meaningful tables
> completely defined by the user.
>
> This is done strictly via single-line command-line commands and
> without the aid of any kind of programming language or interpreter.
>
> A surprising number of typical use cases can be accomplished by users
> via this simple mechanism. In fact, a large number of the tasks that
> users typically do using the more complicated script-based tracing
> tools, at least during the initial stages of an investigation, can be
> accomplished by simply specifying a set of keys and values to be used
> in the creation of a hash table.
>
> The Linux kernel trace event subsystem happens to provide an extensive
> list of keys and values ready-made for such a purpose in the form of
> the event format files associated with each trace event. By simply
> consulting the format file for field names of interest and by plugging
> them into the hist trigger command, users can create an endless number
> of useful aggregations to help with investigating various properties
> of the system. See Documentation/trace/events.txt for examples.
>
> hist triggers are implemented on top of the existing event trigger
> infrastructure, and as such are consistent with the existing triggers
> from a user's perspective as well.
>
> The basic syntax follows the existing trigger syntax. Users start an
> aggregation by writing a 'hist' trigger to the event of interest's
> trigger file:
>
> # echo hist:keys=xxx [ if filter] > event/trigger
>
> Once a hist trigger has been set up, by default it continually
> aggregates every matching event into a hash table using the event key
> and a value field named 'hitcount'.
>
> To view the aggregation at any point in time, simply read the 'hist'
> file in the same directory as the 'trigger' file:
>
> # cat event/hist
>
> The detailed syntax provides additional options for user control, and
> is described exhaustively in Documentation/trace/events.txt and in the
> virtual tracing/README file in the tracing subsystem.
>
> Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
> Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
> ---

[SNIP]
> +static struct hist_trigger_data *
> +create_hist_data(unsigned int map_bits,
> + struct hist_trigger_attrs *attrs,
> + struct trace_event_file *file)
> +{
> + struct hist_trigger_data *hist_data;
> + int ret = 0;
> +
> + hist_data = kzalloc(sizeof(*hist_data), GFP_KERNEL);
> + if (!hist_data)
> + return ERR_PTR(-ENOMEM);
> +
> + hist_data->attrs = attrs;
> +
> + ret = create_hist_fields(hist_data, file);
> + if (ret < 0)
> + goto free;
> +
> + ret = create_sort_keys(hist_data);
> + if (ret < 0)
> + goto free;
> +
> + hist_data->map = tracing_map_create(map_bits, hist_data->key_size,
> + NULL, hist_data);
> + if (IS_ERR(hist_data->map)) {
> + ret = PTR_ERR(hist_data->map);
> + hist_data->map = NULL;
> + goto free;
> + }
> +
> + ret = create_tracing_map_fields(hist_data);
> + if (ret)
> + goto free;
> +
> + ret = tracing_map_init(hist_data->map);
> + if (ret)
> + goto free;
> +
> + hist_data->event_file = file;
> + out:
> + return hist_data;
> + free:
> + hist_data->attrs = NULL;
> +
> + destroy_hist_data(hist_data);
> + if (ret)
> + hist_data = ERR_PTR(ret);
> + else
> + hist_data = NULL;

Is this else part really needed? It seems that all code path to here
set the 'ret' non-zero. Otherwise ...


> +
> + goto out;
> +}

[SNIP]
> +static int event_hist_trigger_func(struct event_command *cmd_ops,
> + struct trace_event_file *file,
> + char *glob, char *cmd, char *param)
> +{
> + unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT;
> + struct event_trigger_data *trigger_data;
> + struct hist_trigger_attrs *attrs;
> + struct event_trigger_ops *trigger_ops;
> + struct hist_trigger_data *hist_data;
> + char *trigger;
> + int ret = 0;
> +
> + if (!param)
> + return -EINVAL;
> +
> + /* separate the trigger from the filter (k:v [if filter]) */
> + trigger = strsep(&param, " \t");
> + if (!trigger)
> + return -EINVAL;
> +
> + attrs = parse_hist_trigger_attrs(trigger);
> + if (IS_ERR(attrs))
> + return PTR_ERR(attrs);
> +
> + if (attrs->map_bits)
> + hist_trigger_bits = attrs->map_bits;
> +
> + hist_data = create_hist_data(hist_trigger_bits, attrs, file);
> + if (IS_ERR(hist_data)) {

... it should be IS_ERR_OR_NULL(hist_data) then.

Thanks,
Namhyung


> + destroy_hist_trigger_attrs(attrs);
> + return PTR_ERR(hist_data);
> + }
> +
> + trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
> +
> + ret = -ENOMEM;
> + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
> + if (!trigger_data)
> + goto out_free;
> +
> + trigger_data->count = -1;
> + trigger_data->ops = trigger_ops;
> + trigger_data->cmd_ops = cmd_ops;
> +
> + INIT_LIST_HEAD(&trigger_data->list);
> + RCU_INIT_POINTER(trigger_data->filter, NULL);
> +
> + trigger_data->private_data = hist_data;
> +
> + if (glob[0] == '!') {
> + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
> + ret = 0;
> + goto out_free;
> + }
> +
> + if (!param) /* if param is non-empty, it's supposed to be a filter */
> + goto out_reg;
> +
> + if (!cmd_ops->set_filter)
> + goto out_reg;
> +
> + ret = cmd_ops->set_filter(param, trigger_data, file);
> + if (ret < 0)
> + goto out_free;
> + out_reg:
> + ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
> + /*
> + * The above returns on success the # of triggers registered,
> + * but if it didn't register any it returns zero. Consider no
> + * triggers registered a failure too.
> + */
> + if (!ret) {
> + ret = -ENOENT;
> + goto out_free;
> + } else if (ret < 0)
> + goto out_free;
> + /* Just return zero, not the number of registered triggers */
> + ret = 0;
> + out:
> + return ret;
> + out_free:
> + if (cmd_ops->set_filter)
> + cmd_ops->set_filter(NULL, trigger_data, NULL);
> +
> + kfree(trigger_data);
> +
> + destroy_hist_data(hist_data);
> + goto out;
> +}
> +
> +static struct event_command trigger_hist_cmd = {
> + .name = "hist",
> + .trigger_type = ETT_EVENT_HIST,
> + .needs_rec = true,
> + .func = event_hist_trigger_func,
> + .reg = hist_register_trigger,
> + .unreg = unregister_trigger,
> + .get_trigger_ops = event_hist_get_trigger_ops,
> + .set_filter = set_trigger_filter,
> +};
> +
> +__init int register_trigger_hist_cmd(void)
> +{
> + int ret;
> +
> + ret = register_event_command(&trigger_hist_cmd);
> + WARN_ON(ret < 0);
> +
> + return ret;
> +}
> diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> index 362ae86..1f1db3c 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -1452,6 +1452,7 @@ __init int register_trigger_cmds(void)
> register_trigger_snapshot_cmd();
> register_trigger_stacktrace_cmd();
> register_trigger_enable_disable_cmds();
> + register_trigger_hist_cmd();
>
> return 0;
> }
> --
> 1.9.3
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/