[PATCH 07/13] perf, tools: Add a simple expression parser for JSON

From: Andi Kleen
Date: Mon Mar 20 2017 - 16:21:07 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Add a simple expression parser good enough to parse JSON relation
expressions. The parser is implemented using bison.

This is just intended as an simple parser for internal usage
in the event lists, not the beginning of a "perf scripting language"

v2: Use expr__ prefix instead of expr_
Support multiple free variables for parser
Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
---
tools/perf/tests/Build | 1 +
tools/perf/tests/builtin-test.c | 4 +
tools/perf/tests/expr.c | 55 +++++++++++++
tools/perf/tests/tests.h | 1 +
tools/perf/util/Build | 5 ++
tools/perf/util/expr.h | 25 ++++++
tools/perf/util/expr.y | 173 ++++++++++++++++++++++++++++++++++++++++
7 files changed, 264 insertions(+)
create mode 100644 tools/perf/tests/expr.c
create mode 100644 tools/perf/util/expr.h
create mode 100644 tools/perf/util/expr.y

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 1cb3d9b540e9..af58ebc243ef 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -38,6 +38,7 @@ perf-y += cpumap.o
perf-y += stat.o
perf-y += event_update.o
perf-y += event-times.o
+perf-y += expr.o
perf-y += backward-ring-buffer.o
perf-y += sdt.o
perf-y += is_printable_array.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 83c4669cbc5b..86822969e8a8 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -44,6 +44,10 @@ static struct test generic_tests[] = {
.func = test__parse_events,
},
{
+ .desc = "Simple expression parser",
+ .func = test__expr,
+ },
+ {
.desc = "PERF_RECORD_* events & perf_sample fields",
.func = test__PERF_RECORD,
},
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
new file mode 100644
index 000000000000..554695c06c5b
--- /dev/null
+++ b/tools/perf/tests/expr.c
@@ -0,0 +1,55 @@
+#include "util/debug.h"
+#include "util/expr.h"
+#include "tests.h"
+
+static int test(struct parse_ctx *ctx, const char *e, double val2)
+{
+ double val;
+
+ if (expr__parse(&val, ctx, &e))
+ TEST_ASSERT_VAL("parse test failed", 0);
+ TEST_ASSERT_VAL("unexpected value", val == val2);
+ return 0;
+}
+
+int test__expr(int subtest __maybe_unused)
+{
+ const char *p;
+ const char **other;
+ double val;
+ int ret;
+ struct parse_ctx ctx;
+ int num_other;
+
+ expr__ctx_init(&ctx);
+ expr__add_id(&ctx, "FOO", 1);
+ expr__add_id(&ctx, "BAR", 2);
+
+ ret = test(&ctx, "1+1", 2);
+ ret |= test(&ctx, "FOO+BAR", 3);
+ ret |= test(&ctx, "(BAR/2)%2", 1);
+ ret |= test(&ctx, "1 - -4", 5);
+ ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5);
+
+ if (ret)
+ return ret;
+
+ p = "FOO/0";
+ ret = expr__parse(&val, &ctx, &p);
+ TEST_ASSERT_VAL("division by zero", ret == 1);
+
+ p = "BAR/";
+ ret = expr__parse(&val, &ctx, &p);
+ TEST_ASSERT_VAL("missing operand", ret == 1);
+
+ TEST_ASSERT_VAL("find other",
+ expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+ TEST_ASSERT_VAL("find other", num_other == 3);
+ TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
+ TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
+ TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
+ TEST_ASSERT_VAL("find other", other[3] == NULL);
+ free((void *)other);
+
+ return 0;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 1fa9b9d83aa5..631859629403 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -62,6 +62,7 @@ int test__sample_parsing(int subtest);
int test__keep_tracking(int subtest);
int test__parse_no_sample_id_all(int subtest);
int test__dwarf_unwind(int subtest);
+int test__expr(int subtest);
int test__hists_filter(int subtest);
int test__mmap_thread_lookup(int subtest);
int test__thread_mg_share(int subtest);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index fb4f42f1bb38..0b98534a9ea1 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -90,6 +90,7 @@ libperf-y += mem-events.o
libperf-y += vsprintf.o
libperf-y += drv_configs.o
libperf-y += time-utils.o
+libperf-y += expr-bison.o

libperf-$(CONFIG_LIBBPF) += bpf-loader.o
libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -142,6 +143,10 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_

+$(OUTPUT)util/expr-bison.c: util/expr.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
new file mode 100644
index 000000000000..9c2760a1a96e
--- /dev/null
+++ b/tools/perf/util/expr.h
@@ -0,0 +1,25 @@
+#ifndef PARSE_CTX_H
+#define PARSE_CTX_H 1
+
+#define EXPR_MAX_OTHER 8
+#define MAX_PARSE_ID EXPR_MAX_OTHER
+
+struct parse_id {
+ const char *name;
+ double val;
+};
+
+struct parse_ctx {
+ int num_ids;
+ struct parse_id ids[MAX_PARSE_ID];
+};
+
+void expr__ctx_init(struct parse_ctx *ctx);
+void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
+#ifndef IN_EXPR_Y
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
+#endif
+int expr__find_other(const char *p, const char *one, const char ***other,
+ int *num_other);
+
+#endif
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
new file mode 100644
index 000000000000..a8daa7a93605
--- /dev/null
+++ b/tools/perf/util/expr.y
@@ -0,0 +1,173 @@
+/* Simple expression parser */
+%{
+#include "util.h"
+#include "util/debug.h"
+#define IN_EXPR_Y 1
+#include "expr.h"
+#include <string.h>
+
+#define MAXIDLEN 256
+%}
+
+%define api.pure full
+%parse-param { double *final_val }
+%parse-param { struct parse_ctx *ctx }
+%parse-param { const char **pp }
+%lex-param { const char **pp }
+
+%union {
+ double num;
+ char id[MAXIDLEN+1];
+}
+
+%token <num> NUMBER
+%token <id> ID
+%left '|'
+%left '^'
+%left '&'
+%left '-' '+'
+%left '*' '/' '%'
+%left NEG NOT
+%type <num> expr
+
+%{
+static int expr__lex(YYSTYPE *res, const char **pp);
+
+static void expr__error(double *final_val __maybe_unused,
+ struct parse_ctx *ctx __maybe_unused,
+ const char **pp __maybe_unused,
+ const char *s)
+{
+ pr_debug("%s\n", s);
+}
+
+static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
+{
+ int i;
+
+ for (i = 0; i < ctx->num_ids; i++) {
+ if (!strcasecmp(ctx->ids[i].name, id)) {
+ *val = ctx->ids[i].val;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+%}
+%%
+
+all_expr: expr { *final_val = $1; }
+ ;
+
+expr: NUMBER
+ | ID { if (lookup_id(ctx, $1, &$$) < 0) {
+ pr_debug("%s not found", $1);
+ YYABORT;
+ }
+ }
+ | expr '+' expr { $$ = $1 + $3; }
+ | expr '-' expr { $$ = $1 - $3; }
+ | expr '*' expr { $$ = $1 * $3; }
+ | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; }
+ | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+ | '-' expr %prec NEG { $$ = -$2; }
+ | '(' expr ')' { $$ = $2; }
+ ;
+
+%%
+
+static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
+{
+ char *dst = res->id;
+ const char *s = p;
+
+ while (isalnum(*p) || *p == '_' || *p == '.') {
+ if (p - s >= MAXIDLEN)
+ return -1;
+ *dst++ = *p++;
+ }
+ *dst = 0;
+ *pp = p;
+ return ID;
+}
+
+static int expr__lex(YYSTYPE *res, const char **pp)
+{
+ int tok;
+ const char *s;
+ const char *p = *pp;
+
+ while (isspace(*p))
+ p++;
+ s = p;
+ switch (*p++) {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ return expr__symbol(res, p - 1, pp);
+ case '0' ... '9': case '.':
+ res->num = strtod(s, (char **)&p);
+ tok = NUMBER;
+ break;
+ default:
+ tok = *s;
+ break;
+ }
+ *pp = p;
+ return tok;
+}
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+ int idx;
+ assert(ctx->num_ids < MAX_PARSE_ID);
+ idx = ctx->num_ids++;
+ ctx->ids[idx].name = name;
+ ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+ ctx->num_ids = 0;
+}
+
+int expr__find_other(const char *p, const char *one, const char ***other,
+ int *num_otherp)
+{
+ const char *orig = p;
+ int err = -1;
+ int num_other;
+
+ *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
+ if (!*other)
+ return -1;
+
+ num_other = 0;
+ for (;;) {
+ YYSTYPE val;
+ int tok = expr__lex(&val, &p);
+ if (tok == 0) {
+ err = 0;
+ break;
+ }
+ if (tok == ID && strcasecmp(one, val.id)) {
+ if (num_other >= EXPR_MAX_OTHER - 1) {
+ pr_debug("Too many extra events in %s\n", orig);
+ break;
+ }
+ (*other)[num_other] = strdup(val.id);
+ if (!(*other)[num_other])
+ return -1;
+ num_other++;
+ }
+ }
+ (*other)[num_other] = NULL;
+ *num_otherp = num_other;
+ if (err) {
+ *num_otherp = 0;
+ free(*other);
+ *other = NULL;
+ }
+ return err;
+}
--
2.9.3