Re: [PATCH v13 09/29] tracing: Add 'hist' event trigger command

From: Tom Zanussi
Date: Fri Feb 19 2016 - 18:50:00 EST


Hi Steve,

On Fri, 2016-02-19 at 16:23 -0500, Steven Rostedt wrote:
> On Thu, 10 Dec 2015 12:50:51 -0600
> Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx> wrote:
>
> > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > new file mode 100644
> > index 0000000..213aa79
> > --- /dev/null
> > +++ b/kernel/trace/trace_events_hist.c
> > @@ -0,0 +1,837 @@
> > +/*
> > + * trace_events_hist - trace event hist triggers
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * Copyright (C) 2015 Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/kallsyms.h>
> > +#include <linux/mutex.h>
> > +#include <linux/slab.h>
> > +#include <linux/stacktrace.h>
> > +
> > +#include "tracing_map.h"
> > +#include "trace.h"
> > +
> > +struct hist_field;
> > +
> > +typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
> > +
> > +struct hist_field {
> > + struct ftrace_event_field *field;
> > + unsigned long flags;
> > + hist_field_fn_t fn;
> > + unsigned int size;
> > +};
> > +
> > +static u64 hist_field_counter(struct hist_field *field, void *event)
> > +{
> > + return 1;
> > +}
> > +
> > +static u64 hist_field_string(struct hist_field *hist_field, void *event)
> > +{
> > + char *addr = (char *)(event + hist_field->field->offset);
> > +
> > + return (u64)(unsigned long)addr;
> > +}
> > +
> > +#define DEFINE_HIST_FIELD_FN(type) \
> > +static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
> > +{ \
> > + type *addr = (type *)(event + hist_field->field->offset); \
> > + \
> > + return (u64)*addr; \
> > +}
> > +
> > +DEFINE_HIST_FIELD_FN(s64);
> > +DEFINE_HIST_FIELD_FN(u64);
> > +DEFINE_HIST_FIELD_FN(s32);
> > +DEFINE_HIST_FIELD_FN(u32);
> > +DEFINE_HIST_FIELD_FN(s16);
> > +DEFINE_HIST_FIELD_FN(u16);
> > +DEFINE_HIST_FIELD_FN(s8);
> > +DEFINE_HIST_FIELD_FN(u8);
> > +
> > +#define for_each_hist_field(i, hist_data) \
> > + for (i = 0; i < hist_data->n_fields; i++)
> > +
> > +#define for_each_hist_val_field(i, hist_data) \
> > + for (i = 0; i < hist_data->n_vals; i++)
> > +
> > +#define for_each_hist_key_field(i, hist_data) \
> > + for (i = hist_data->n_vals; i < hist_data->n_fields; i++)
>
> The above should have parenthesis around the macro variables.
>
> e.g.
>
> (hist_data)->n_fields
>
> > +
> > +#define HITCOUNT_IDX 0
> > +#define HIST_KEY_MAX 1
> > +#define HIST_KEY_SIZE_MAX MAX_FILTER_STR_VAL
> > +
> > +enum hist_field_flags {
> > + HIST_FIELD_HITCOUNT = 1,
> > + HIST_FIELD_KEY = 2,
> > + HIST_FIELD_STRING = 4,
>
> Note, please add "_FL" to the enums to denote they are flags (used for
> bitmask).
>
> e.g. HIST_FIELD_FL_HITCOUNT
>
>
> > +};
> > +
> > +struct hist_trigger_attrs {
> > + char *keys_str;
> > + unsigned int map_bits;
> > +};
> > +
> > +struct hist_trigger_data {
> > + struct hist_field *fields[TRACING_MAP_FIELDS_MAX];
>
> Hmm I'm confused, TRACING_MAP_FIELDS_MAX == 4
>
> But below it we index into fields with n_fields. Which look to me can
> be much bigger than 4.
>
> Or is there some correlation between n_vals, n_keys and 4?
>

Yeah, the correlation is:

n_fields = n_vals + n_keys

where n_keys can't be larger than TRACING_MAP_KEYS_MAX, and n_vals can't
be larger than TRACING_MAP_FIELDS_MAX - TRACING_MAP_KEYS_MAX, where
currently TRACING_MAP_KEYS_MAX = 2 and TRACING_MAP_FIELDS_MAX = 4, which
means the max number of values is currently 2 as well. So n_fields
shouldn't be bigger than 4, but...

Looking through the code to make sure, I realized that it should be
fields[TRACING_MAP_FIELDS_MAX + 1] because of the hitcount, which is an
always-defined value field and takes the first slot in fields[]
(followed by the rest of the vals, then the keys). Previous versions
had separate keys[] and vals[] arrays, but that was changed to be more
uniform and somehow adding 1 for the hitcount was overlooked. Obviously
I'll fix that size, but the rest of the code should be ok wrt that.

> > + unsigned int n_vals;
> > + unsigned int n_keys;
> > + unsigned int n_fields;
> > + unsigned int key_size;
> > + struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX];
> > + unsigned int n_sort_keys;
> > + struct trace_event_file *event_file;
> > + struct hist_trigger_attrs *attrs;
> > + struct tracing_map *map;
> > +};
> > +
> > +static hist_field_fn_t select_value_fn(int field_size, int field_is_signed)
> > +{
> > + hist_field_fn_t fn = NULL;
> > +
> > + switch (field_size) {
> > + case 8:
> > + if (field_is_signed)
> > + fn = hist_field_s64;
> > + else
> > + fn = hist_field_u64;
> > + break;
> > + case 4:
> > + if (field_is_signed)
> > + fn = hist_field_s32;
> > + else
> > + fn = hist_field_u32;
> > + break;
> > + case 2:
> > + if (field_is_signed)
> > + fn = hist_field_s16;
> > + else
> > + fn = hist_field_u16;
> > + break;
> > + case 1:
> > + if (field_is_signed)
> > + fn = hist_field_s8;
> > + else
> > + fn = hist_field_u8;
> > + break;
> > + }
> > +
> > + return fn;
> > +}
> > +
> > +static int parse_map_size(char *str)
> > +{
> > + unsigned long size, map_bits;
> > + int ret;
> > +
> > + strsep(&str, "=");
> > + if (!str) {
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > +
> > + ret = kstrtoul(str, 0, &size);
> > + if (ret)
> > + goto out;
> > +
> > + map_bits = ilog2(roundup_pow_of_two(size));
> > + if (map_bits < TRACING_MAP_BITS_MIN ||
> > + map_bits > TRACING_MAP_BITS_MAX)
> > + ret = -EINVAL;
> > + else
> > + ret = map_bits;
> > + out:
> > + return ret;
> > +}
> > +
> > +static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
> > +{
> > + if (!attrs)
> > + return;
> > +
> > + kfree(attrs->keys_str);
> > + kfree(attrs);
> > +}
> > +
> > +static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
> > +{
> > + struct hist_trigger_attrs *attrs;
> > + int ret = 0;
> > +
> > + attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
> > + if (!attrs)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + while (trigger_str) {
> > + char *str = strsep(&trigger_str, ":");
> > +
> > + if (!strncmp(str, "keys", strlen("keys")) ||
> > + !strncmp(str, "key", strlen("key")))
>
> nit, I prefer "strncmp(x,y,n) == 0" as it shows an equals, and, at
> least to me, easier to quickly understand.
>

OK, I'll change it - I've gotten so used to automatically doing the
negation in my head, but I don't care either way.

> But not so nit, if !strncmp(str, "keys", strlen("keys")) then
> !strcmp(str, "key", strlen("key")) is true!
>
> It has the same result as:
>
> if (!strncmp(str, "key", strlen("key")))
>
> That is equivalent to if (x > 12 || x > 10) where
> if (x > 10) will do.
>

Yeah, that is kind of useless.

> Is that suppose to be a full match or partial match?
>

It should match either but not less or more.

> > + attrs->keys_str = kstrdup(str, GFP_KERNEL);
> > + else if (!strncmp(str, "size", strlen("size"))) {
>
> Is it OK to allow "sizeUgaBooga=10"?
>
> Perhaps the compare should be against "size=" ?
>
> Same for the keys above.

Yeah, good point, I'll fix all those up..

>
> > + int map_bits = parse_map_size(str);
> > +
> > + if (map_bits < 0) {
> > + ret = map_bits;
> > + goto free;
> > + }
> > + attrs->map_bits = map_bits;
> > + } else {
> > + ret = -EINVAL;
> > + goto free;
> > + }
> > + }
> > +
> > + if (!attrs->keys_str) {
> > + ret = -EINVAL;
> > + goto free;
> > + }
> > +
> > + return attrs;
> > + free:
> > + destroy_hist_trigger_attrs(attrs);
> > +
> > + return ERR_PTR(ret);
> > +}
> > +
>
> I stopped here. I'll review the rest later.
>

OK, thanks!

Tom

> -- Steve