Re: [PATCH] trace_uprobe: support reference count in fd-based uprobe
From: Ravi Bangoria
Date: Wed Aug 22 2018 - 06:28:40 EST
On 08/22/2018 03:53 AM, Song Liu wrote:
> This patch applies on top of Ravi Bangoria's work that enables reference
> count for uprobe:
>
> https://lkml.org/lkml/2018/8/20/37
>
> After Ravi's work, the effort to enable it in fd-based uprobe is straight
> forward. Highest 40 bits of perf_event_attr.config is used to stored offset
> of the reference count (semaphore).
>
> Format information in /sys/bus/event_source/devices/uprobe/format/ is
> updated to reflect this new feature.
LGTM
Reviewed-and-tested-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxx>
>
> Signed-off-by: Song Liu <songliubraving@xxxxxx>
> Cc: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxx>
> Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
> Cc: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
> Cc: Naveen N. Rao <naveen.n.rao@xxxxxxxxxxxxxxxxxx>
> Cc: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Teng Qin <qinteng@xxxxxx>
> ---
> include/linux/trace_events.h | 3 +-
> kernel/events/core.c | 49 ++++++++++++++++++++++++++-------
> kernel/trace/trace_event_perf.c | 7 +++--
> kernel/trace/trace_probe.h | 3 +-
> kernel/trace/trace_uprobe.c | 4 ++-
> 5 files changed, 50 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 78a010e19ed4..4130a5497d40 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -575,7 +575,8 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
> bool perf_type_tracepoint);
> #endif
> #ifdef CONFIG_UPROBE_EVENTS
> -extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
> +extern int perf_uprobe_init(struct perf_event *event,
> + unsigned long ref_ctr_offset, bool is_retprobe);
> extern void perf_uprobe_destroy(struct perf_event *event);
> extern int bpf_get_uprobe_info(const struct perf_event *event,
> u32 *fd_type, const char **filename,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 8f0434a9951a..75a0219be420 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8363,30 +8363,39 @@ static struct pmu perf_tracepoint = {
> *
> * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
> * if not set, create kprobe/uprobe
> + *
> + * The following values specify a reference counter (or semaphore in the
> + * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
> + * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
> + *
> + * PERF_UPROBE_REF_CTR_OFFSET_BITS # of bits in config as th offset
> + * PERF_UPROBE_REF_CTR_OFFSET_SHIFT # of bits to shift left
> */
> enum perf_probe_config {
> PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
> + PERF_UPROBE_REF_CTR_OFFSET_BITS = 40,
> + PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
> };
>
> PMU_FORMAT_ATTR(retprobe, "config:0");
> +#endif
>
> -static struct attribute *probe_attrs[] = {
> +#ifdef CONFIG_KPROBE_EVENTS
> +static struct attribute *kprobe_attrs[] = {
> &format_attr_retprobe.attr,
> NULL,
> };
>
> -static struct attribute_group probe_format_group = {
> +static struct attribute_group kprobe_format_group = {
> .name = "format",
> - .attrs = probe_attrs,
> + .attrs = kprobe_attrs,
> };
>
> -static const struct attribute_group *probe_attr_groups[] = {
> - &probe_format_group,
> +static const struct attribute_group *kprobe_attr_groups[] = {
> + &kprobe_format_group,
> NULL,
> };
> -#endif
>
> -#ifdef CONFIG_KPROBE_EVENTS
> static int perf_kprobe_event_init(struct perf_event *event);
> static struct pmu perf_kprobe = {
> .task_ctx_nr = perf_sw_context,
> @@ -8396,7 +8405,7 @@ static struct pmu perf_kprobe = {
> .start = perf_swevent_start,
> .stop = perf_swevent_stop,
> .read = perf_swevent_read,
> - .attr_groups = probe_attr_groups,
> + .attr_groups = kprobe_attr_groups,
> };
>
> static int perf_kprobe_event_init(struct perf_event *event)
> @@ -8428,6 +8437,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
> #endif /* CONFIG_KPROBE_EVENTS */
>
> #ifdef CONFIG_UPROBE_EVENTS
> +PMU_FORMAT_ATTR(ref_ctr_offset, "config:63-24");
> +
> +static struct attribute *uprobe_attrs[] = {
> + &format_attr_retprobe.attr,
> + &format_attr_ref_ctr_offset.attr,
> + NULL,
> +};
> +
> +static struct attribute_group uprobe_format_group = {
> + .name = "format",
> + .attrs = uprobe_attrs,
> +};
> +
> +static const struct attribute_group *uprobe_attr_groups[] = {
> + &uprobe_format_group,
> + NULL,
> +};
> +
> static int perf_uprobe_event_init(struct perf_event *event);
> static struct pmu perf_uprobe = {
> .task_ctx_nr = perf_sw_context,
> @@ -8437,12 +8464,13 @@ static struct pmu perf_uprobe = {
> .start = perf_swevent_start,
> .stop = perf_swevent_stop,
> .read = perf_swevent_read,
> - .attr_groups = probe_attr_groups,
> + .attr_groups = uprobe_attr_groups,
> };
>
> static int perf_uprobe_event_init(struct perf_event *event)
> {
> int err;
> + unsigned long ref_ctr_offset;
> bool is_retprobe;
>
> if (event->attr.type != perf_uprobe.type)
> @@ -8458,7 +8486,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
> return -EOPNOTSUPP;
>
> is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
> - err = perf_uprobe_init(event, is_retprobe);
> + ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
> + err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
> if (err)
> return err;
>
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 69a3fe926e8c..76217bbef815 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event)
> #endif /* CONFIG_KPROBE_EVENTS */
>
> #ifdef CONFIG_UPROBE_EVENTS
> -int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
> +int perf_uprobe_init(struct perf_event *p_event,
> + unsigned long ref_ctr_offset, bool is_retprobe)
> {
> int ret;
> char *path = NULL;
> @@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
> goto out;
> }
>
> - tp_event = create_local_trace_uprobe(
> - path, p_event->attr.probe_offset, is_retprobe);
> + tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
> + ref_ctr_offset, is_retprobe);
> if (IS_ERR(tp_event)) {
> ret = PTR_ERR(tp_event);
> goto out;
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 5f52668e165d..03b10f3201a5 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -412,6 +412,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
> extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
>
> extern struct trace_event_call *
> -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
> +create_local_trace_uprobe(char *name, unsigned long offs,
> + unsigned long ref_ctr_offset, bool is_return);
> extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
> #endif
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index 3a7c73c40007..d09638706fe0 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -1405,7 +1405,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
>
> #ifdef CONFIG_PERF_EVENTS
> struct trace_event_call *
> -create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
> +create_local_trace_uprobe(char *name, unsigned long offs,
> + unsigned long ref_ctr_offset, bool is_return)
> {
> struct trace_uprobe *tu;
> struct path path;
> @@ -1437,6 +1438,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
>
> tu->offset = offs;
> tu->path = path;
> + tu->ref_ctr_offset = ref_ctr_offset;
> tu->filename = kstrdup(name, GFP_KERNEL);
> init_trace_event_call(tu, &tu->tp.call);
>
>