Re: [PATCH v7 2/4] trace/objtrace: get the value of the object
From: Jeff Xie
Date: Tue Jan 18 2022 - 21:28:20 EST
Hi Masami,
On Tue, Jan 18, 2022 at 11:13 PM Masami Hiramatsu <mhiramat@xxxxxxxxxx> wrote:
>
> Hi Jeff,
>
> On Thu, 13 Jan 2022 09:38:33 +0800
> Jeff Xie <xiehuan09@xxxxxxxxx> wrote:
>
> > Using objtrace trigger to get the value of the object which from the kernel
> > function parameter.
> >
> > Syntax:
> > objtrace:add:obj[,offset][:type][:count][if <filter>]
> >
> > Usage:
> > # echo 'p bio_add_page arg1=$arg1' > ./kprobe_events
> > # gdb vmlinux
> > (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
> > $1 = (unsigned int *) 0x28
> > # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
> > p_bio_add_page_0/trigger
> > # cat /test.txt
> >
> > Signed-off-by: Jeff Xie <xiehuan09@xxxxxxxxx>
> > ---
> > kernel/trace/trace.c | 2 +-
> > kernel/trace/trace_entries.h | 5 +-
> > kernel/trace/trace_object.c | 190 +++++++++++++++++++++++++++++------
> > kernel/trace/trace_output.c | 6 +-
> > 4 files changed, 169 insertions(+), 34 deletions(-)
> >
> > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> > index 0b627963e343..d5332ece4c67 100644
> > --- a/kernel/trace/trace.c
> > +++ b/kernel/trace/trace.c
> > @@ -5591,7 +5591,7 @@ static const char readme_msg[] =
> > "\t disable_hist:<system>:<event>\n"
> > #endif
> > #ifdef CONFIG_TRACE_OBJECT
> > - "\t objtrace:add:obj[:count][if <filter>]\n"
> > + "\t objtrace:add:obj[,offset][:type][:count][if <filter>]\n"
> > #endif
> > #ifdef CONFIG_STACKTRACE
> > "\t\t stacktrace\n"
> > diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> > index bb120d9498a9..2407c45a568c 100644
> > --- a/kernel/trace/trace_entries.h
> > +++ b/kernel/trace/trace_entries.h
> > @@ -413,8 +413,9 @@ FTRACE_ENTRY(object, trace_object_entry,
> > __field( unsigned long, ip )
> > __field( unsigned long, parent_ip )
> > __field( unsigned long, object )
> > + __field( unsigned long, value )
> > ),
> >
> > - F_printk(" %ps <-- %ps object:%lx\n",
> > - (void *)__entry->ip, (void *)__entry->parent_ip, __entry->object)
> > + F_printk(" %ps <-- %ps object:%lx value:%lx\n", (void *)__entry->ip,
> > + (void *)__entry->parent_ip, __entry->object, __entry->value)
> > );
> > diff --git a/kernel/trace/trace_object.c b/kernel/trace/trace_object.c
> > index 4af1c117cbfa..774a9dff4d0a 100644
> > --- a/kernel/trace/trace_object.c
> > +++ b/kernel/trace/trace_object.c
> > @@ -11,7 +11,6 @@
> > #define MAX_TRACED_OBJECT 5
> > static DEFINE_RAW_SPINLOCK(trace_obj_lock);
> > static struct trace_event_file event_trace_file;
> > -static const int max_args_num = 6;
> > static atomic_t trace_object_ref;
> > static atomic_t num_traced_obj;
> > static int exit_trace_object(void);
> > @@ -19,8 +18,22 @@ static int init_trace_object(void);
> >
> > static struct object_instance {
> > void *obj;
> > + int obj_type_size;
> > } traced_obj[MAX_TRACED_OBJECT];
> >
> > +/* objtrace private data */
> > +struct objtrace_trigger_data {
> > + struct ftrace_event_field *field;
> > + int offset;
> > + int obj_type_size;
> > +};
> > +
> > +/* get the type size for the special object */
> > +struct objtrace_fetch_type {
> > + char *name;
> > + int type_size;
> > +};
> > +
> > static bool object_exist(void *obj)
> > {
> > int i, max;
> > @@ -39,7 +52,7 @@ static bool object_empty(void)
> > return !atomic_read(&num_traced_obj);
> > }
> >
> > -static void set_trace_object(void *obj)
> > +static void set_trace_object(void *obj, int obj_type_size)
> > {
> > unsigned long flags;
> >
> > @@ -59,6 +72,7 @@ static void set_trace_object(void *obj)
> > goto out;
> > }
> > traced_obj[atomic_read(&num_traced_obj)].obj = obj;
> > + traced_obj[atomic_read(&num_traced_obj)].obj_type_size = obj_type_size;
> > /* make sure the num_traced_obj update always appears after traced_obj update */
> > smp_wmb();
> > atomic_inc(&num_traced_obj);
> > @@ -67,7 +81,7 @@ static void set_trace_object(void *obj)
> > }
> >
> > static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> > - unsigned long object)
> > + unsigned long object, unsigned long value)
> > {
> >
> > struct trace_buffer *buffer;
> > @@ -84,18 +98,66 @@ static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> > entry->ip = ip;
> > entry->parent_ip = parent_ip;
> > entry->object = object;
> > + entry->value = value;
> >
> > event_trigger_unlock_commit(&event_trace_file, buffer, event,
> > entry, pc);
> > }
> >
> > +static inline long get_object_value(unsigned long *val, void *obj, int type_size)
> > +{
> > + long ret = 0;
> > +
> > + switch (type_size) {
> > + case 1: {
> > + u8 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 2: {
> > + u16 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 4: {
> > + u32 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 8: {
> > + u64 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
>
> Can't you move the copy_from_kernel_nofault() outside of the switch-case?
> e.g.
>
> char tmp[sizeof(u64)];
>
> ret = copy_from_kernel_nofault(tmp, obj, sizeof(tmp));
> if (ret)
> return ret;
>
> switch (type_size) {
> case 1:
> *val = (unsigned long)*(u8 *)tmp;
> break;
> ...
> }
> return 0;
Thanks, I will change it.
> Thank you,
>
>
> > + default:
> > + return -EINVAL;
> > + }
> > +out:
> > + return ret;
> > +}
> > +
> > static void
> > trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> > struct ftrace_ops *op, struct ftrace_regs *fregs)
> > {
> > - struct pt_regs *pt_regs = ftrace_get_regs(fregs);
> > - unsigned long obj;
> > - int bit, n;
> > + unsigned long val = 0;
> > + int bit, n, max;
> >
> > bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > if (bit < 0)
> > @@ -104,11 +166,12 @@ trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> > if (object_empty())
> > goto out;
> >
> > - for (n = 0; n < max_args_num; n++) {
> > - obj = regs_get_kernel_argument(pt_regs, n);
> > - if (object_exist((void *)obj))
> > - submit_trace_object(ip, parent_ip, obj);
>
> This actually filtered the (function call) event which is related to the object.
>
> > - /* The parameters of a function may match multiple objects */
> > + max = atomic_read(&num_traced_obj);
> > + smp_rmb();
> > + for (n = 0; n < max; n++) {
> > + if (get_object_value(&val, traced_obj[n].obj, traced_obj[n].obj_type_size))
> > + goto out;
> > + submit_trace_object(ip, parent_ip, (unsigned long)traced_obj[n].obj, val);
>
> But you lost the filter here.
> This is because you saves the "object + offset" address below;
I misunderstood the previous discussion and thought that filter
do not apply when getting the value of the object. ;-)
I will add the filter.
>
> > }
> > out:
> > ftrace_test_recursion_unlock(bit);
> > @@ -124,12 +187,15 @@ trace_object_trigger(struct event_trigger_data *data,
> > struct trace_buffer *buffer, void *rec,
> > struct ring_buffer_event *event)
> > {
> > + struct objtrace_trigger_data *obj_data = data->private_data;
> > + struct ftrace_event_field *field;
> > + void *obj, *val = NULL;
> >
> > - struct ftrace_event_field *field = data->private_data;
> > - void *obj = NULL;
> > -
> > - memcpy(&obj, rec + field->offset, sizeof(obj));
> > - set_trace_object(obj);
> > + field = obj_data->field;
> > + memcpy(&val, rec + field->offset, sizeof(val));
> > + /* get the final object */
> > + obj = val + obj_data->offset;
>
> Here, you saved the object address including offset. This means
> you can not filter the actual function related to the object.
>
> Please recover the filter.
>
> > + set_trace_object(obj, obj_data->obj_type_size);
>
> You can save the offset with type-size and object address itself.
Thanks, I will recover it.
> Thank you,
>
> > }
> >
> > static void
> > @@ -140,8 +206,10 @@ trace_object_trigger_free(struct event_trigger_ops *ops,
> > return;
> >
> > data->ref--;
> > - if (!data->ref)
> > + if (!data->ref) {
> > + kfree(data->private_data);
> > trigger_data_free(data);
> > + }
> > }
> >
> > static void
> > @@ -276,6 +344,22 @@ static void unregister_object_trigger(char *glob, struct event_trigger_ops *ops,
> > }
> > }
> >
> > +static const struct objtrace_fetch_type objtrace_fetch_types[] = {
> > + {"u8", 1},
> > + {"s8", 1},
> > + {"x8", 1},
> > + {"u16", 2},
> > + {"s16", 2},
> > + {"x16", 2},
> > + {"u32", 4},
> > + {"s32", 4},
> > + {"x32", 4},
> > + {"u64", 8},
> > + {"s64", 8},
> > + {"x64", 8},
> > + {}
> > +};
> > +
> > static int
> > event_object_trigger_callback(struct event_command *cmd_ops,
> > struct trace_event_file *file,
> > @@ -283,19 +367,22 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > {
> > struct event_trigger_data *trigger_data;
> > struct event_trigger_ops *trigger_ops;
> > + struct objtrace_trigger_data *obj_data;
> > struct trace_event_call *call;
> > struct ftrace_event_field *field;
> > - char *objtrace_cmd;
> > - char *trigger = NULL;
> > - char *arg;
> > - char *number;
> > - int ret;
> > + char *type, *tr, *obj, *tmp, *trigger = NULL;
> > + char *number, *objtrace_cmd;
> > + int ret, i, def_type_size, obj_type_size = 0;
> > + long offset = 0;
> >
> > ret = -EINVAL;
> > if (!param)
> > goto out;
> >
> > - /* separate the trigger from the filter (c:a:n [if filter]) */
> > + /*
> > + * separate the trigger from the filter:
> > + * objtrace:add:OBJ[,OFFS][:TYPE][:COUNT] [if filter]
> > + */
> > trigger = strsep(¶m, " \t");
> > if (!trigger)
> > goto out;
> > @@ -309,33 +396,79 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > if (!objtrace_cmd || strcmp(objtrace_cmd, "add"))
> > goto out;
> >
> > - arg = strsep(&trigger, ":");
> > - if (!arg)
> > + obj = strsep(&trigger, ":");
> > + if (!obj)
> > goto out;
> > +
> > + tr = strchr(obj, ',');
> > + if (!tr)
> > + offset = 0;
> > + else {
> > + *tr++ = '\0';
> > + ret = kstrtol(tr, 0, &offset);
> > + if (ret)
> > + goto out;
> > + }
> > +
> > + ret = -EINVAL;
> > call = file->event_call;
> > - field = trace_find_event_field(call, arg);
> > + field = trace_find_event_field(call, obj);
> > if (!field)
> > goto out;
> >
> > if (field->size != sizeof(void *))
> > goto out;
> > + def_type_size = sizeof(void *);
> > + if (!trigger) {
> > + obj_type_size = def_type_size;
> > + goto skip_get_type;
> > + }
> >
> > + tmp = trigger;
> > + type = strsep(&trigger, ":");
> > + if (!type)
> > + obj_type_size = def_type_size;
> > + else if (isdigit(type[0])) {
> > + obj_type_size = def_type_size;
> > + trigger = tmp;
> > + } else {
> > + for (i = 0; objtrace_fetch_types[i].name; i++) {
> > + if (strcmp(objtrace_fetch_types[i].name, type) == 0) {
> > + obj_type_size = objtrace_fetch_types[i].type_size;
> > + break;
> > + }
> > + }
> > + }
> > + if (!obj_type_size)
> > + goto out;
> > +skip_get_type:
> > trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
> >
> > ret = -ENOMEM;
> > + obj_data = kzalloc(sizeof(*obj_data), GFP_KERNEL);
> > + if (!obj_data)
> > + goto out;
> > +
> > + obj_data->field = field;
> > + obj_data->offset = offset;
> > + obj_data->obj_type_size = obj_type_size;
> > +
> > trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
> > - if (!trigger_data)
> > + if (!trigger_data) {
> > + kfree(obj_data);
> > goto out;
> > + }
> >
> > trigger_data->count = -1;
> > trigger_data->ops = trigger_ops;
> > trigger_data->cmd_ops = cmd_ops;
> > - trigger_data->private_data = field;
> > + trigger_data->private_data = obj_data;
> > INIT_LIST_HEAD(&trigger_data->list);
> > INIT_LIST_HEAD(&trigger_data->named_list);
> >
> > if (glob[0] == '!') {
> > cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
> > + kfree(obj_data);
> > kfree(trigger_data);
> > ret = 0;
> > goto out;
> > @@ -390,6 +523,7 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > out_free:
> > if (cmd_ops->set_filter)
> > cmd_ops->set_filter(NULL, trigger_data, NULL);
> > + kfree(obj_data);
> > kfree(trigger_data);
> > goto out;
> > }
> > diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> > index d747aed27104..12a971927d8c 100644
> > --- a/kernel/trace/trace_output.c
> > +++ b/kernel/trace/trace_output.c
> > @@ -1553,6 +1553,7 @@ static enum print_line_t trace_object_print(struct trace_iterator *iter, int fla
> > trace_assign_type(field, iter->ent);
> > print_fn_trace(s, field->ip, field->parent_ip, flags);
> > trace_seq_printf(s, " object:0x%lx", field->object);
> > + trace_seq_printf(s, " value:0x%lx", field->value);
> > trace_seq_putc(s, '\n');
> >
> > return trace_handle_return(s);
> > @@ -1565,9 +1566,8 @@ static enum print_line_t trace_object_raw(struct trace_iterator *iter, int flags
> >
> > trace_assign_type(field, iter->ent);
> >
> > - trace_seq_printf(&iter->seq, "%lx %lx\n",
> > - field->ip,
> > - field->parent_ip);
> > + trace_seq_printf(&iter->seq, "%lx %lx %lx %lx\n", field->ip,
> > + field->parent_ip, field->object, field->value);
> >
> > return trace_handle_return(&iter->seq);
> > }
> > --
> > 2.25.1
> >
>
>
> --
> Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---
JeffXie