[PATCH 3/3] tracing: Add support for dynamic strings to synthetic events

From: Tom Zanussi
Date: Tue Sep 29 2020 - 16:33:55 EST


Currently, sythetic events only support static string fields such as:

# echo 'test_latency u64 lat; char somename[32]' > /sys/kernel/debug/tracing/synthetic_events

Which is fine, but wastes a lot of space in the event.

It also prevents the most commonly-defined strings in the existing
trace events e.g. those defined using __string(), from being passed to
synthetic events via the trace() action.

With this change, synthetic events with dynamic fields can be defined:

# echo 'test_latency u64 lat; char somename[]' > /sys/kernel/debug/tracing/synthetic_events

And the trace() action can be used to generate events using either
dynamic or static strings:

# echo 'hist:keys=name:lat=common_timestamp.usecs-$ts0:onmatch(sys.event).test_latency($lat,name)' > /sys/kernel/debug/tracing/events

The synthetic event dynamic strings are implemented in the same way as
the existing __data_loc strings and appear as such in the format file.

Signed-off-by: Tom Zanussi <zanussi@xxxxxxxxxx>
---
Documentation/trace/events.rst | 15 ++-
Documentation/trace/histogram.rst | 18 ++++
kernel/trace/trace_events_hist.c | 9 ++
kernel/trace/trace_events_synth.c | 157 ++++++++++++++++++++++++++----
kernel/trace/trace_synth.h | 4 +
5 files changed, 181 insertions(+), 22 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index f792b1959a33..2a5aa48eff6c 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -589,8 +589,19 @@ name::
{ .type = "int", .name = "my_int_field" },
};

-See synth_field_size() for available types. If field_name contains [n]
-the field is considered to be an array.
+See synth_field_size() for available types.
+
+If field_name contains [n], the field is considered to be a static array.
+
+If field_names contains[] (no subscript), the field is considered to
+be a dynamic array, which will only take as much space in the event as
+is required to hold the array.
+
+Because space for an event is reserved before assigning field values
+to the event, using dynamic arrays implies that the piecewise
+in-kernel API described below can't be used with dynamic arrays. The
+other non-piecewise in-kernel APIs can, however, be used with dynamic
+arrays.

If the event is created from within a module, a pointer to the module
must be passed to synth_event_create(). This will ensure that the
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 8408670d0328..b573604deabd 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1776,6 +1776,24 @@ consisting of the name of the new event along with one or more
variables and their types, which can be any valid field type,
separated by semicolons, to the tracing/synthetic_events file.

+See synth_field_size() for available types.
+
+If field_name contains [n], the field is considered to be a static array.
+
+If field_names contains[] (no subscript), the field is considered to
+be a dynamic array, which will only take as much space in the event as
+is required to hold the array.
+
+A string field can be specified using either the static notation:
+
+ char name[32];
+
+Or the dynamic:
+
+ char name[];
+
+The size limit for either is 256.
+
For instance, the following creates a new event named 'wakeup_latency'
with 3 fields: lat, pid, and prio. Each of those fields is simply a
variable reference to a variable on another event::
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1b2ef6490229..ee6b27c82350 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3279,6 +3279,15 @@ static int check_synth_field(struct synth_event *event,

field = event->fields[field_pos];

+ /*
+ * A dynamic string synth field can accept static or
+ * dynamic. A static string synth field can only accept a
+ * same-sized static string, which is checked for later.
+ */
+ if (strstr(hist_field->type, "char[") && field->is_string
+ && field->is_dynamic)
+ return 0;
+
if (strcmp(field->type, hist_field->type) != 0) {
if (field->size != hist_field->size ||
field->is_signed != hist_field->is_signed)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 6e7282c7b530..00c6c61038d1 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -88,7 +88,7 @@ static int synth_event_define_fields(struct trace_event_call *call)

event->fields[i]->offset = n_u64;

- if (event->fields[i]->is_string) {
+ if (event->fields[i]->is_string && !event->fields[i]->is_dynamic) {
offset += STR_VAR_LEN_MAX;
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
@@ -139,6 +139,9 @@ static int synth_field_string_size(char *type)
if (len > 3)
return -EINVAL;

+ if (len == 0)
+ return 0; /* variable-length string */
+
strncpy(buf, start, len);
buf[len] = '\0';

@@ -290,10 +293,25 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,

/* parameter values */
if (se->fields[i]->is_string) {
- trace_seq_printf(s, print_fmt, se->fields[i]->name,
- (char *)&entry->fields[n_u64],
- i == se->n_fields - 1 ? "" : " ");
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ if (se->fields[i]->is_dynamic) {
+ u32 offset, data_offset;
+ char *str_field;
+
+ offset = (u32)entry->fields[n_u64];
+ data_offset = offset & 0xffff;
+
+ str_field = (char *)entry + data_offset;
+
+ trace_seq_printf(s, print_fmt, se->fields[i]->name,
+ str_field,
+ i == se->n_fields - 1 ? "" : " ");
+ n_u64++;
+ } else {
+ trace_seq_printf(s, print_fmt, se->fields[i]->name,
+ (char *)&entry->fields[n_u64],
+ i == se->n_fields - 1 ? "" : " ");
+ n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ }
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
@@ -325,16 +343,52 @@ static struct trace_event_functions synth_event_funcs = {
.trace = print_synth_event
};

+static unsigned int trace_string(struct synth_trace_event *entry,
+ struct synth_event *event,
+ char *str_val,
+ bool is_dynamic,
+ unsigned int data_size,
+ unsigned int *n_u64)
+{
+ unsigned int len = 0;
+ char *str_field;
+
+ if (is_dynamic) {
+ u32 data_offset;
+
+ data_offset = offsetof(typeof(*entry), fields);
+ data_offset += event->n_u64 * sizeof(u64);
+ data_offset += data_size;
+
+ str_field = (char *)entry + data_offset;
+
+ len = strlen(str_val) + 1;
+ strscpy(str_field, str_val, len);
+
+ data_offset |= len << 16;
+ *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+ (*n_u64)++;
+ } else {
+ str_field = (char *)&entry->fields[*n_u64];
+
+ strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+ (*n_u64) += STR_VAR_LEN_MAX / sizeof(u64);
+ }
+
+ return len;
+}
+
static notrace void trace_event_raw_event_synth(void *__data,
u64 *var_ref_vals,
unsigned int *var_ref_idx)
{
+ unsigned int i, n_u64, val_idx, len, data_size = 0;
struct trace_event_file *trace_file = __data;
struct synth_trace_event *entry;
struct trace_event_buffer fbuffer;
struct trace_buffer *buffer;
struct synth_event *event;
- unsigned int i, n_u64, val_idx;
int fields_size = 0;

event = trace_file->event_call->data;
@@ -344,6 +398,18 @@ static notrace void trace_event_raw_event_synth(void *__data,

fields_size = event->n_u64 * sizeof(u64);

+ for (i = 0; i < event->n_dynamic_fields; i++) {
+ unsigned int field_pos = event->dynamic_fields[i]->field_pos;
+ char *str_val;
+
+ val_idx = var_ref_idx[field_pos];
+ str_val = (char *)(long)var_ref_vals[val_idx];
+
+ len = strlen(str_val) + 1;
+
+ fields_size += len;
+ }
+
/*
* Avoid ring buffer recursion detection, as this event
* is being performed within another event.
@@ -360,10 +426,11 @@ static notrace void trace_event_raw_event_synth(void *__data,
val_idx = var_ref_idx[i];
if (event->fields[i]->is_string) {
char *str_val = (char *)(long)var_ref_vals[val_idx];
- char *str_field = (char *)&entry->fields[n_u64];

- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(entry, event, str_val,
+ event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = event->fields[i];
u64 val = var_ref_vals[val_idx];
@@ -525,6 +592,27 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
if (size < 0) {
ret = -EINVAL;
goto free;
+ } else if (size == 0) {
+ if (synth_field_is_string(field->type)) {
+ char *type;
+
+ type = kzalloc(sizeof("__data_loc ") + strlen(field->type) + 1, GFP_KERNEL);
+ if (!type) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ strcat(type, "__data_loc ");
+ strcat(type, field->type);
+ kfree(field->type);
+ field->type = type;
+
+ field->is_dynamic = true;
+ size = sizeof(u64);
+ } else {
+ ret = -EINVAL;
+ goto free;
+ }
}
field->size = size;

@@ -532,7 +620,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv,
field->is_string = true;

field->is_signed = synth_field_signed(field->type);
-
out:
return field;
free:
@@ -663,6 +750,7 @@ static void free_synth_event(struct synth_event *event)
free_synth_field(event->fields[i]);

kfree(event->fields);
+ kfree(event->dynamic_fields);
kfree(event->name);
kfree(event->class.system);
free_synth_tracepoint(event->tp);
@@ -673,8 +761,8 @@ static void free_synth_event(struct synth_event *event)
static struct synth_event *alloc_synth_event(const char *name, int n_fields,
struct synth_field **fields)
{
+ unsigned int i, j, n_dynamic_fields = 0;
struct synth_event *event;
- unsigned int i;

event = kzalloc(sizeof(*event), GFP_KERNEL);
if (!event) {
@@ -696,11 +784,33 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields,
goto out;
}

+ for (i = 0; i < n_fields; i++)
+ if (fields[i]->is_dynamic)
+ n_dynamic_fields++;
+
+ if (n_dynamic_fields) {
+ event->dynamic_fields = kcalloc(n_dynamic_fields,
+ sizeof(*event->dynamic_fields),
+ GFP_KERNEL);
+ if (!event->dynamic_fields) {
+ free_synth_event(event);
+ event = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ }
+
dyn_event_init(&event->devent, &synth_event_ops);

- for (i = 0; i < n_fields; i++)
+ for (i = 0, j = 0; i < n_fields; i++) {
event->fields[i] = fields[i];

+ if (fields[i]->is_dynamic) {
+ event->dynamic_fields[j] = fields[i];
+ event->dynamic_fields[j]->field_pos = i;
+ event->dynamic_fields[j++] = fields[i];
+ event->n_dynamic_fields++;
+ }
+ }
event->n_fields = n_fields;
out:
return event;
@@ -1276,8 +1386,8 @@ __synth_event_trace_end(struct synth_event_trace_state *trace_state)
*/
int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
{
+ unsigned int i, n_u64, len, data_size = 0;
struct synth_event_trace_state state;
- unsigned int i, n_u64;
va_list args;
int ret;

@@ -1301,10 +1411,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)

if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)val;
- char *str_field = (char *)&state.entry->fields[n_u64];

- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(state.entry, state.event, str_val,
+ state.event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = state.event->fields[i];

@@ -1357,8 +1468,8 @@ EXPORT_SYMBOL_GPL(synth_event_trace);
int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
unsigned int n_vals)
{
+ unsigned int i, n_u64, len, data_size = 0;
struct synth_event_trace_state state;
- unsigned int i, n_u64;
int ret;

ret = __synth_event_trace_start(file, &state);
@@ -1376,10 +1487,11 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)vals[i];
- char *str_field = (char *)&state.entry->fields[n_u64];

- strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+ len = trace_string(state.entry, state.event, str_val,
+ state.event->fields[i]->is_dynamic,
+ data_size, &n_u64);
+ data_size += len; /* only dynamic string increments */
} else {
struct synth_field *field = state.event->fields[i];
u64 val = vals[i];
@@ -1510,6 +1622,11 @@ static int __synth_event_add_val(const char *field_name, u64 val,
char *str_val = (char *)(long)val;
char *str_field;

+ if (field->is_dynamic) { /* add_val can't do dynamic strings */
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!str_val) {
ret = -EINVAL;
goto out;
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index 5166705d1556..6e146b959dcd 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -16,6 +16,8 @@ struct synth_field {
unsigned int offset;
bool is_signed;
bool is_string;
+ bool is_dynamic;
+ bool field_pos;
};

struct synth_event {
@@ -24,6 +26,8 @@ struct synth_event {
char *name;
struct synth_field **fields;
unsigned int n_fields;
+ struct synth_field **dynamic_fields;
+ unsigned int n_dynamic_fields;
unsigned int n_u64;
struct trace_event_class class;
struct trace_event_call call;
--
2.17.1