Re: Using perf_event_open() to sample multiple events of a process

From: Peter Zijlstra
Date: Mon Nov 08 2021 - 10:24:41 EST


On Sat, Nov 06, 2021 at 01:57:23AM +0100, Peter Zijlstra wrote:

> The problem seems to be that we call perf_event_set_output() before we
> set event->ctx, which is a bit of a problem.
>
> Now, afaict it's been broken since c3f00c70276d ("perf: Separate
> find_get_context() from event initialization"), which is ages ago :/
>
> It's waaay too late to try and fix it; I'll be likely to make an even
> bigger mess if I tried. Perhaps tomorrow.
>
> Clearly FD_OUTPUT isn't much used :-(

The below seems to fix, it's a bit of a hack, but I couldn't really come
up with anything saner.

---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f2253ea729a2..dbe766663733 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5595,6 +5595,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
}

static int perf_event_set_output(struct perf_event *event,
+ struct perf_event_context *ctx,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
@@ -5647,10 +5648,10 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
if (ret)
return ret;
output_event = output.file->private_data;
- ret = perf_event_set_output(event, output_event);
+ ret = perf_event_set_output(event, event->ctx, output_event);
fdput(output);
} else {
- ret = perf_event_set_output(event, NULL);
+ ret = perf_event_set_output(event, event->ctx, NULL);
}
return ret;
}
@@ -11830,7 +11831,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
}

static int
-perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
+perf_event_set_output(struct perf_event *event,
+ struct perf_event_context *event_ctx,
+ struct perf_event *output_event)
{
struct perf_buffer *rb = NULL;
int ret = -EINVAL;
@@ -11851,7 +11854,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
/*
* If its not a per-cpu rb, it must be the same task.
*/
- if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ if (output_event->cpu == -1 && output_event->ctx != event_ctx)
goto out;

/*
@@ -12232,7 +12235,7 @@ SYSCALL_DEFINE5(perf_event_open,
}

if (output_event) {
- err = perf_event_set_output(event, output_event);
+ err = perf_event_set_output(event, ctx, output_event);
if (err)
goto err_context;
}