[PATCH v2 2/3] perf/sdt/x86: Move OP parser to tools/perf/arch/x86/

From: Ravi Bangoria
Date: Mon Mar 27 2017 - 04:00:27 EST


SDT marker argument is in N@OP format. N is the size of argument and
OP is the actual assembly operand. OP is arch dependent component and
hence it's parsing logic also should be placed under tools/perf/arch/.

Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
---
tools/perf/arch/x86/util/perf_regs.c | 179 ++++++++++++++++++++++++++++-------
tools/perf/util/perf_regs.c | 6 +-
tools/perf/util/perf_regs.h | 11 ++-
tools/perf/util/probe-file.c | 132 ++++++++------------------
4 files changed, 194 insertions(+), 134 deletions(-)

diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 99faab4..897fcfd 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -1,8 +1,10 @@
#include <string.h>
+#include <regex.h>

#include "../../perf.h"
#include "../../util/util.h"
#include "../../util/perf_regs.h"
+#include "../../util/debug.h"

const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -37,7 +39,7 @@ struct sdt_name_reg {
#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}

-static const struct sdt_name_reg sdt_reg_renamings[] = {
+static const struct sdt_name_reg sdt_reg_tbl[] = {
SDT_NAME_REG(eax, ax),
SDT_NAME_REG(rax, ax),
SDT_NAME_REG(al, ax),
@@ -91,45 +93,158 @@ static const struct sdt_name_reg sdt_reg_renamings[] = {
SDT_NAME_REG_END,
};

-int sdt_rename_register(char **pdesc, char *old_name)
+/*
+ * Perf only supports OP which is in +/-NUM(REG) form.
+ * Here plus-minus sign, NUM and parenthesis are optional,
+ * only REG is mandatory.
+ *
+ * SDT events also supports indirect addressing mode with a
+ * symbol as offset, scaled mode and constants in OP. But
+ * perf does not support them yet. Below are few examples.
+ *
+ * OP with scaled mode:
+ * (%rax,%rsi,8)
+ * 10(%ras,%rsi,8)
+ *
+ * OP with indirect addressing mode:
+ * check_action(%rip)
+ * mp_+52(%rip)
+ * 44+mp_(%rip)
+ *
+ * OP with constat values:
+ * $0
+ * $123
+ * $-1
+ */
+#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
+
+static regex_t sdt_op_regex;
+
+static int sdt_init_op_regex(void)
{
- const struct sdt_name_reg *rnames = sdt_reg_renamings;
- char *new_desc, *old_desc = *pdesc;
- size_t prefix_len, sdt_len, uprobe_len, old_desc_len, offset;
- int ret = -1;
-
- while (ret != 0 && rnames->sdt_name != NULL) {
- sdt_len = strlen(rnames->sdt_name);
- ret = strncmp(old_name, rnames->sdt_name, sdt_len);
- rnames += !!ret;
- }
+ static int initialized;
+ int ret = 0;

- if (rnames->sdt_name == NULL)
+ if (initialized)
return 0;

- sdt_len = strlen(rnames->sdt_name);
- uprobe_len = strlen(rnames->uprobe_name);
- old_desc_len = strlen(old_desc) + 1;
+ ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
+ if (ret < 0) {
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+ }

- new_desc = zalloc(old_desc_len + uprobe_len - sdt_len);
- if (new_desc == NULL)
- return -1;
+ initialized = 1;
+ return 0;
+}

- /* Copy the chars before the register name (at least '%') */
- prefix_len = old_name - old_desc;
- memcpy(new_desc, old_desc, prefix_len);
+/*
+ * Max x86 register name length is 5(ex: %r15d). So, 6th char
+ * should always contain NULL. This helps to find register name
+ * length using strlen, insted of maintaing one more variable.
+ */
+#define SDT_REG_NAME_SIZE 6

- /* Copy the new register name */
- memcpy(new_desc + prefix_len, rnames->uprobe_name, uprobe_len);
+/*
+ * The uprobe parser does not support all gas register names;
+ * so, we have to replace them (ex. for x86_64: %rax -> %ax).
+ * Note: If register does not require renaming, just copy
+ * paste as it is, but don't leave it empty.
+ */
+static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
+{
+ int i = 0;

- /* Copy the chars after the register name (if need be) */
- offset = prefix_len + sdt_len;
- if (offset < old_desc_len)
- memcpy(new_desc + prefix_len + uprobe_len,
- old_desc + offset, old_desc_len - offset);
+ for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
+ if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
+ strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
+ return;
+ }
+ }

- free(old_desc);
- *pdesc = new_desc;
+ strncpy(uprobe_reg, sdt_reg, sdt_len);
+}

- return 0;
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+ char new_reg[SDT_REG_NAME_SIZE] = {0};
+ int new_len = 0, ret;
+ /*
+ * rm[0]: +/-NUM(REG)
+ * rm[1]: +/-
+ * rm[2]: NUM
+ * rm[3]: (
+ * rm[4]: REG
+ * rm[5]: )
+ */
+ regmatch_t rm[6];
+ /*
+ * Max prefix length is 2 as it may contains sign(+/-)
+ * and displacement 0 (Both sign and displacement 0 are
+ * optional so it may be empty). Use one more character
+ * to hold last NULL so that strlen can be used to find
+ * prefix length, instead of maintaing one more variable.
+ */
+ char prefix[3] = {0};
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ /*
+ * If unsupported OR does not match with regex OR
+ * register name too long, skip it.
+ */
+ if (strchr(old_op, ',') || strchr(old_op, '$') ||
+ regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
+ rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ /*
+ * Prepare prefix.
+ * If SDT OP has parenthesis but does not provide
+ * displacement, add 0 for displacement.
+ * SDT Uprobe Prefix
+ * -----------------------------
+ * +24(%rdi) +24(%di) +
+ * 24(%rdi) +24(%di) +
+ * %rdi %di
+ * (%rdi) +0(%di) +0
+ * -80(%rbx) -80(%bx) -
+ */
+ if (rm[3].rm_so != rm[3].rm_eo) {
+ if (rm[1].rm_so != rm[1].rm_eo)
+ prefix[0] = *(old_op + rm[1].rm_so);
+ else if (rm[2].rm_so != rm[2].rm_eo)
+ prefix[0] = '+';
+ else
+ strncpy(prefix, "+0", 2);
+ }
+
+ /* Rename register */
+ sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
+ new_reg);
+
+ /* Prepare final OP which should be valid for uprobe_events */
+ new_len = strlen(prefix) +
+ (rm[2].rm_eo - rm[2].rm_so) +
+ (rm[3].rm_eo - rm[3].rm_so) +
+ strlen(new_reg) +
+ (rm[5].rm_eo - rm[5].rm_so) +
+ 1; /* NULL */
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
+ strlen(prefix), prefix,
+ (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+ (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
+ strlen(new_reg), new_reg,
+ (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
+
+ return SDT_ARG_VALID;
}
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index a37e593..b2ae039 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -6,10 +6,10 @@ const struct sample_reg __weak sample_reg_masks[] = {
SMPL_REG_END
};

-int __weak sdt_rename_register(char **pdesc __maybe_unused,
- char *old_name __maybe_unused)
+int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
+ char **new_op __maybe_unused)
{
- return 0;
+ return SDT_ARG_SKIP;
}

#ifdef HAVE_PERF_REGS_SUPPORT
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 7544a15..32b37d1 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -15,11 +15,12 @@ struct sample_reg {

extern const struct sample_reg sample_reg_masks[];

-/*
- * The table sdt_reg_renamings is used for adjusting gcc/gas-generated
- * registers before filling the uprobe tracer interface.
- */
-int sdt_rename_register(char **pdesc, char *old_name);
+enum {
+ SDT_ARG_VALID = 0,
+ SDT_ARG_SKIP,
+};
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op);

#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index d741634..88714de 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -694,10 +694,29 @@ static const char * const type_to_suffix[] = {
"", ":u8", ":u16", "", ":u32", "", "", "", ":u64"
};

+/*
+ * Isolate the string number and convert it into a decimal value;
+ * this will be an index to get suffix of the uprobe name (defining
+ * the type)
+ */
+static int sdt_arg_parse_size(char *n_ptr, const char **suffix)
+{
+ long type_idx;
+
+ type_idx = strtol(n_ptr, NULL, 10);
+ if (type_idx < -8 || type_idx > 8) {
+ pr_debug4("Failed to get a valid sdt type\n");
+ return -1;
+ }
+
+ *suffix = type_to_suffix[type_idx + 8];
+ return 0;
+}
+
static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
{
- char *tmp, *desc = strdup(arg);
- const char *prefix = "", *suffix = "";
+ char *op, *desc = strdup(arg), *new_op = NULL;
+ const char *suffix = "";
int ret = -1;

if (desc == NULL) {
@@ -705,112 +724,37 @@ static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
return ret;
}

- tmp = strchr(desc, '@');
- if (tmp) {
- long type_idx;
- /*
- * Isolate the string number and convert it into a
- * binary value; this will be an index to get suffix
- * of the uprobe name (defining the type)
- */
- tmp[0] = '\0';
- type_idx = strtol(desc, NULL, 10);
- /* Check that the conversion went OK */
- if (type_idx == LONG_MIN || type_idx == LONG_MAX) {
- pr_debug4("Failed to parse sdt type\n");
- goto error;
- }
- /* Check that the converted value is OK */
- if (type_idx < -8 || type_idx > 8) {
- pr_debug4("Failed to get a valid sdt type\n");
- goto error;
- }
- suffix = type_to_suffix[type_idx + 8];
- /* Get rid of the sdt prefix which is now useless */
- tmp++;
- memmove(desc, tmp, strlen(tmp) + 1);
- }
-
/*
- * The uprobe tracer format does not support all the
- * addressing modes (notably: in x86 the scaled mode); so, we
- * detect ',' characters, if there is just one, there is no
- * use converting the sdt arg into a uprobe one.
+ * Argument is in N@OP format. N is size of the argument and OP is
+ * the actual assembly operand. N can be omitted; in that case
+ * argument is just OP(without @).
*/
- if (strchr(desc, ',')) {
- pr_debug4("Skipping unsupported SDT argument; %s\n", desc);
- goto out;
- }
+ op = strchr(desc, '@');
+ if (op) {
+ op[0] = '\0';
+ op++;

- /*
- * If the argument addressing mode is indirect, we must check
- * a few things...
- */
- tmp = strchr(desc, '(');
- if (tmp) {
- int j;
-
- /*
- * ...if the addressing mode is indirect with a
- * positive offset (ex.: "1608(%ax)"), we need to add
- * a '+' prefix so as to be compliant with uprobe
- * format.
- */
- if (desc[0] != '+' && desc[0] != '-')
- prefix = "+";
-
- /*
- * ...or if the addressing mode is indirect with a symbol
- * as offset, the argument will not be supported by
- * the uprobe tracer format; so, let's skip this one.
- */
- for (j = 0; j < tmp - desc; j++) {
- if (desc[j] != '+' && desc[j] != '-' &&
- !isdigit(desc[j])) {
- pr_debug4("Skipping unsupported SDT argument; "
- "%s\n", desc);
- goto out;
- }
- }
+ if (sdt_arg_parse_size(desc, &suffix))
+ goto error;
+ } else {
+ op = desc;
}

- /*
- * The uprobe tracer format does not support constants; if we
- * find one in the current argument, let's skip the argument.
- */
- if (strchr(desc, '$')) {
- pr_debug4("Skipping unsupported SDT argument; %s\n", desc);
- goto out;
- }
+ ret = arch_sdt_arg_parse_op(op, &new_op);

- /*
- * The uprobe parser does not support all gas register names;
- * so, we have to replace them (ex. for x86_64: %rax -> %ax);
- * the loop below looks for the register names (starting with
- * a '%' and tries to perform the needed renamings.
- */
- tmp = strchr(desc, '%');
- while (tmp) {
- size_t offset = tmp - desc;
+ if (ret < 0)
+ goto error;

- ret = sdt_rename_register(&desc, desc + offset);
+ if (ret == SDT_ARG_VALID) {
+ ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix);
if (ret < 0)
goto error;
-
- /*
- * The desc pointer might have changed; so, let's not
- * try to reuse tmp for next lookup
- */
- tmp = strchr(desc + offset + 1, '%');
}

- if (strbuf_addf(buf, " arg%d=%s%s%s", i + 1, prefix, desc, suffix) < 0)
- goto error;
-
-out:
ret = 0;
error:
free(desc);
+ free(new_op);
return ret;
}

--
2.9.3