[PATCH v6 18/37] tracing: Add simple expression support to hist triggers

From: Tom Zanussi
Date: Fri Nov 17 2017 - 15:42:01 EST


Add support for simple addition, subtraction, and unary expressions
(-(expr) and expr, where expr = b-a, a+b, a+b+c) to hist triggers, in
order to support a minimal set of useful inter-event calculations.

These operations are needed for calculating latencies between events
(timestamp1-timestamp0) and for combined latencies (latencies over 3
or more events).

In the process, factor out some common code from key and value
parsing.

Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
---
kernel/trace/trace_events_hist.c | 513 +++++++++++++++++++++++++++++++++------
1 file changed, 440 insertions(+), 73 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index b33c2bc..f6c59a5 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -32,6 +32,13 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event,
#define HIST_FIELD_OPERANDS_MAX 2
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)

+enum field_op_id {
+ FIELD_OP_NONE,
+ FIELD_OP_PLUS,
+ FIELD_OP_MINUS,
+ FIELD_OP_UNARY_MINUS,
+};
+
struct hist_var {
char *name;
struct hist_trigger_data *hist_data;
@@ -48,6 +55,8 @@ struct hist_field {
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
struct hist_var var;
+ enum field_op_id operator;
+ char *name;
};

static u64 hist_field_none(struct hist_field *field, void *event,
@@ -98,6 +107,41 @@ static u64 hist_field_log2(struct hist_field *hist_field, void *event,
return (u64) ilog2(roundup_pow_of_two(val));
}

+static u64 hist_field_plus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, event, rbe);
+ u64 val2 = operand2->fn(operand2, event, rbe);
+
+ return val1 + val2;
+}
+
+static u64 hist_field_minus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, event, rbe);
+ u64 val2 = operand2->fn(operand2, event, rbe);
+
+ return val1 - val2;
+}
+
+static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand = hist_field->operands[0];
+
+ s64 sval = (s64)operand->fn(operand, event, rbe);
+ u64 val = (u64)-sval;
+
+ return val;
+}
+
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -147,6 +191,7 @@ enum hist_field_flags {
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
HIST_FIELD_FL_VAR = 1 << 12,
+ HIST_FIELD_FL_EXPR = 1 << 13,
};

struct var_defs {
@@ -258,6 +303,8 @@ static const char *hist_field_name(struct hist_field *field,
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "$common_timestamp";
+ else if (field->flags & HIST_FIELD_FL_EXPR)
+ field_name = field->name;

if (field_name == NULL)
field_name = "";
@@ -519,6 +566,93 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
return flags_str;
}

+static char *expr_str(struct hist_field *field, unsigned int level)
+{
+ char *expr;
+
+ if (level > 1)
+ return NULL;
+
+ expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+ if (!expr)
+ return NULL;
+
+ if (field->operator == FIELD_OP_UNARY_MINUS) {
+ char *subexpr;
+
+ strcat(expr, "-(");
+ subexpr = expr_str(field->operands[0], ++level);
+ if (!subexpr) {
+ kfree(expr);
+ return NULL;
+ }
+ strcat(expr, subexpr);
+ strcat(expr, ")");
+
+ return expr;
+ }
+
+ strcat(expr, hist_field_name(field->operands[0], 0));
+ if (field->operands[0]->flags) {
+ const char *flags_str = get_hist_field_flags(field->operands[0]);
+
+ if (flags_str) {
+ strcat(expr, ".");
+ strcat(expr, flags_str);
+ }
+ }
+
+ switch (field->operator) {
+ case FIELD_OP_MINUS:
+ strcat(expr, "-");
+ break;
+ case FIELD_OP_PLUS:
+ strcat(expr, "+");
+ break;
+ default:
+ kfree(expr);
+ return NULL;
+ }
+
+ strcat(expr, hist_field_name(field->operands[1], 0));
+ if (field->operands[1]->flags) {
+ const char *flags_str = get_hist_field_flags(field->operands[1]);
+
+ if (flags_str) {
+ strcat(expr, ".");
+ strcat(expr, flags_str);
+ }
+ }
+
+ return expr;
+}
+
+static int contains_operator(char *str)
+{
+ enum field_op_id field_op = FIELD_OP_NONE;
+ char *op;
+
+ op = strpbrk(str, "+-");
+ if (!op)
+ return FIELD_OP_NONE;
+
+ switch (*op) {
+ case '-':
+ if (*str == '-')
+ field_op = FIELD_OP_UNARY_MINUS;
+ else
+ field_op = FIELD_OP_MINUS;
+ break;
+ case '+':
+ field_op = FIELD_OP_PLUS;
+ break;
+ default:
+ break;
+ }
+
+ return field_op;
+}
+
static void destroy_hist_field(struct hist_field *hist_field,
unsigned int level)
{
@@ -534,6 +668,7 @@ static void destroy_hist_field(struct hist_field *hist_field,
destroy_hist_field(hist_field->operands[i], level + 1);

kfree(hist_field->var.name);
+ kfree(hist_field->name);

kfree(hist_field);
}
@@ -554,6 +689,9 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,

hist_field->hist_data = hist_data;

+ if (flags & HIST_FIELD_FL_EXPR)
+ goto out; /* caller will populate */
+
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
goto out;
@@ -626,59 +764,323 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data)
}
}

-static int create_hitcount_val(struct hist_trigger_data *hist_data)
+static char *field_name_from_var(struct hist_trigger_data *hist_data,
+ char *var_name)
{
- hist_data->fields[HITCOUNT_IDX] =
- create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
- if (!hist_data->fields[HITCOUNT_IDX])
- return -ENOMEM;
+ char *name, *field;
+ unsigned int i;

- hist_data->n_vals++;
- hist_data->n_fields++;
+ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+ name = hist_data->attrs->var_defs.name[i];

- if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
- return -EINVAL;
+ if (strcmp(var_name, name) == 0) {
+ field = hist_data->attrs->var_defs.expr[i];
+ if (contains_operator(field))
+ continue;
+ return field;
+ }
+ }

- return 0;
+ return NULL;
}

-static int __create_val_field(struct hist_trigger_data *hist_data,
- unsigned int val_idx,
- struct trace_event_file *file,
- char *var_name, char *field_str,
- unsigned long flags)
+static char *local_field_var_ref(struct hist_trigger_data *hist_data,
+ char *var_name)
+{
+ var_name++;
+
+ return field_name_from_var(hist_data, var_name);
+}
+
+static struct ftrace_event_field *
+parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+ char *field_str, unsigned long *flags)
{
struct ftrace_event_field *field = NULL;
- char *field_name;
- int ret = 0;
+ char *field_name, *modifier, *str;
+
+ modifier = str = kstrdup(field_str, GFP_KERNEL);
+ if (!modifier)
+ return ERR_PTR(-ENOMEM);

- field_name = strsep(&field_str, ".");
- if (field_str) {
- if (strcmp(field_str, "hex") == 0)
- flags |= HIST_FIELD_FL_HEX;
+ field_name = strsep(&modifier, ".");
+ if (modifier) {
+ if (strcmp(modifier, "hex") == 0)
+ *flags |= HIST_FIELD_FL_HEX;
+ else if (strcmp(modifier, "sym") == 0)
+ *flags |= HIST_FIELD_FL_SYM;
+ else if (strcmp(modifier, "sym-offset") == 0)
+ *flags |= HIST_FIELD_FL_SYM_OFFSET;
+ else if ((strcmp(modifier, "execname") == 0) &&
+ (strcmp(field_name, "common_pid") == 0))
+ *flags |= HIST_FIELD_FL_EXECNAME;
+ else if (strcmp(modifier, "syscall") == 0)
+ *flags |= HIST_FIELD_FL_SYSCALL;
+ else if (strcmp(modifier, "log2") == 0)
+ *flags |= HIST_FIELD_FL_LOG2;
+ else if (strcmp(modifier, "usecs") == 0)
+ *flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
- ret = -EINVAL;
+ field = ERR_PTR(-EINVAL);
goto out;
}
}

if (strcmp(field_name, "$common_timestamp") == 0) {
- flags |= HIST_FIELD_FL_TIMESTAMP;
+ *flags |= HIST_FIELD_FL_TIMESTAMP;
hist_data->enable_timestamps = true;
+ if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ hist_data->attrs->ts_in_usecs = true;
} else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
- ret = -EINVAL;
+ field = ERR_PTR(-EINVAL);
goto out;
}
}
+ out:
+ kfree(str);
+
+ return field;
+}

- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name);
- if (!hist_data->fields[val_idx]) {
+struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file, char *str,
+ unsigned long *flags, char *var_name)
+{
+ char *s;
+ struct ftrace_event_field *field = NULL;
+ struct hist_field *hist_field = NULL;
+ int ret = 0;
+
+ s = local_field_var_ref(hist_data, str);
+ if (s)
+ str = s;
+
+ field = parse_field(hist_data, file, str, flags);
+ if (IS_ERR(field)) {
+ ret = PTR_ERR(field);
+ goto out;
+ }
+
+ hist_field = create_hist_field(hist_data, field, *flags, var_name);
+ if (!hist_field) {
ret = -ENOMEM;
goto out;
}

+ return hist_field;
+ out:
+ return ERR_PTR(ret);
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level);
+
+static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level)
+{
+ struct hist_field *operand1, *expr = NULL;
+ unsigned long operand_flags;
+ int ret = 0;
+ char *s;
+
+ // we support only -(xxx) i.e. explicit parens required
+
+ if (level > 2) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ str++; // skip leading '-'
+
+ s = strchr(str, '(');
+ if (s)
+ str++;
+ else {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ s = strchr(str, ')');
+ if (s)
+ *s = '\0';
+ else {
+ ret = -EINVAL; // no closing ')'
+ goto free;
+ }
+
+ strsep(&str, "(");
+ if (!str) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ flags |= HIST_FIELD_FL_EXPR;
+ expr = create_hist_field(hist_data, NULL, flags, var_name);
+ if (!expr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ operand_flags = 0;
+ operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+ if (IS_ERR(operand1)) {
+ ret = PTR_ERR(operand1);
+ goto free;
+ }
+
+ expr->fn = hist_field_unary_minus;
+ expr->operands[0] = operand1;
+ expr->operator = FIELD_OP_UNARY_MINUS;
+ expr->name = expr_str(expr, 0);
+
+ return expr;
+ free:
+ destroy_hist_field(expr, 0);
+ return ERR_PTR(ret);
+}
+
+static int check_expr_operands(struct hist_field *operand1,
+ struct hist_field *operand2)
+{
+ unsigned long operand1_flags = operand1->flags;
+ unsigned long operand2_flags = operand2->flags;
+
+ if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
+ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level)
+{
+ struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL;
+ unsigned long operand_flags;
+ int field_op, ret = -EINVAL;
+ char *sep, *operand1_str;
+
+ if (level > 2)
+ return ERR_PTR(-EINVAL);
+
+ field_op = contains_operator(str);
+
+ if (field_op == FIELD_OP_NONE)
+ return parse_atom(hist_data, file, str, &flags, var_name);
+
+ if (field_op == FIELD_OP_UNARY_MINUS)
+ return parse_unary(hist_data, file, str, flags, var_name, ++level);
+
+ switch (field_op) {
+ case FIELD_OP_MINUS:
+ sep = "-";
+ break;
+ case FIELD_OP_PLUS:
+ sep = "+";
+ break;
+ default:
+ goto free;
+ }
+
+ operand1_str = strsep(&str, sep);
+ if (!operand1_str || !str)
+ goto free;
+
+ operand_flags = 0;
+ operand1 = parse_atom(hist_data, file, operand1_str,
+ &operand_flags, NULL);
+ if (IS_ERR(operand1)) {
+ ret = PTR_ERR(operand1);
+ operand1 = NULL;
+ goto free;
+ }
+
+ // rest of string could be another expression e.g. b+c in a+b+c
+ operand_flags = 0;
+ operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+ if (IS_ERR(operand2)) {
+ ret = PTR_ERR(operand2);
+ operand2 = NULL;
+ goto free;
+ }
+
+ ret = check_expr_operands(operand1, operand2);
+ if (ret)
+ goto free;
+
+ flags |= HIST_FIELD_FL_EXPR;
+ expr = create_hist_field(hist_data, NULL, flags, var_name);
+ if (!expr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ expr->operands[0] = operand1;
+ expr->operands[1] = operand2;
+ expr->operator = field_op;
+ expr->name = expr_str(expr, 0);
+
+ switch (field_op) {
+ case FIELD_OP_MINUS:
+ expr->fn = hist_field_minus;
+ break;
+ case FIELD_OP_PLUS:
+ expr->fn = hist_field_plus;
+ break;
+ default:
+ goto free;
+ }
+
+ return expr;
+ free:
+ destroy_hist_field(operand1, 0);
+ destroy_hist_field(operand2, 0);
+ destroy_hist_field(expr, 0);
+
+ return ERR_PTR(ret);
+}
+
+static int create_hitcount_val(struct hist_trigger_data *hist_data)
+{
+ hist_data->fields[HITCOUNT_IDX] =
+ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
+ if (!hist_data->fields[HITCOUNT_IDX])
+ return -ENOMEM;
+
+ hist_data->n_vals++;
+ hist_data->n_fields++;
+
+ if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __create_val_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *var_name, char *field_str,
+ unsigned long flags)
+{
+ struct hist_field *hist_field;
+ int ret = 0;
+
+ hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0);
+ if (IS_ERR(hist_field)) {
+ ret = PTR_ERR(hist_field);
+ goto out;
+ }
+
+ hist_data->fields[val_idx] = hist_field;
+
++hist_data->n_vals;
++hist_data->n_fields;

@@ -765,8 +1167,8 @@ static int create_key_field(struct hist_trigger_data *hist_data,
struct trace_event_file *file,
char *field_str)
{
- struct ftrace_event_field *field = NULL;
struct hist_field *hist_field = NULL;
+
unsigned long flags = 0;
unsigned int key_size;
int ret = 0;
@@ -781,60 +1183,24 @@ static int create_key_field(struct hist_trigger_data *hist_data,
key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
hist_field = create_hist_field(hist_data, NULL, flags, NULL);
} else {
- char *field_name = strsep(&field_str, ".");
-
- if (field_str) {
- if (strcmp(field_str, "hex") == 0)
- flags |= HIST_FIELD_FL_HEX;
- else if (strcmp(field_str, "sym") == 0)
- flags |= HIST_FIELD_FL_SYM;
- else if (strcmp(field_str, "sym-offset") == 0)
- flags |= HIST_FIELD_FL_SYM_OFFSET;
- else if ((strcmp(field_str, "execname") == 0) &&
- (strcmp(field_name, "common_pid") == 0))
- flags |= HIST_FIELD_FL_EXECNAME;
- else if (strcmp(field_str, "syscall") == 0)
- flags |= HIST_FIELD_FL_SYSCALL;
- else if (strcmp(field_str, "log2") == 0)
- flags |= HIST_FIELD_FL_LOG2;
- else if (strcmp(field_str, "usecs") == 0)
- flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
- else {
- ret = -EINVAL;
- goto out;
- }
+ hist_field = parse_expr(hist_data, file, field_str, flags,
+ NULL, 0);
+ if (IS_ERR(hist_field)) {
+ ret = PTR_ERR(hist_field);
+ goto out;
}

- if (strcmp(field_name, "$common_timestamp") == 0) {
- flags |= HIST_FIELD_FL_TIMESTAMP;
- hist_data->enable_timestamps = true;
- if (flags & HIST_FIELD_FL_TIMESTAMP_USECS)
- hist_data->attrs->ts_in_usecs = true;
- key_size = sizeof(u64);
- } else {
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
- }
-
- if (is_string_field(field))
- key_size = MAX_FILTER_STR_VAL;
- else
- key_size = field->size;
- }
+ key_size = hist_field->size;
}

- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL);
- if (!hist_data->fields[key_idx]) {
- ret = -ENOMEM;
- goto out;
- }
+ hist_data->fields[key_idx] = hist_field;

key_size = ALIGN(key_size, sizeof(u64));
hist_data->fields[key_idx]->size = key_size;
hist_data->fields[key_idx]->offset = key_offset;
+
hist_data->key_size += key_size;
+
if (hist_data->key_size > HIST_KEY_SIZE_MAX) {
ret = -EINVAL;
goto out;
@@ -1419,7 +1785,8 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
for (i = 1; i < hist_data->n_vals; i++) {
field_name = hist_field_name(hist_data->fields[i], 0);

- if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR)
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+ hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
continue;

if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
--
1.9.3