Re: [PATCH 4/7] perf: Free aux pages in unmap path

From: Alexander Shishkin
Date: Thu Dec 10 2015 - 06:20:55 EST


Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes:

>> How about something like this to stop the writers:
>>
>> static int __ring_buffer_output_stop(void *info)
>> {
>> struct ring_buffer *rb = info;
>> struct perf_event *event;
>>
>> spin_lock(&rb->event_lock);
>> list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
>> if (event->state != PERF_EVENT_STATE_ACTIVE)
>> continue;
>>
>> event->pmu->stop(event, PERF_EF_UPDATE);
>> }
>> spin_unlock(&rb->event_lock);
>>
>> return 0;
>> }
>>
>> static void perf_event_output_stop(struct perf_event *event)
>> {
>> struct ring_buffer *rb = event->rb;
>>
>> lockdep_assert_held(&event->mmap_mutex);
>>
>> if (event->cpu == -1)
>> perf_event_stop(event);
>>
>> cpu_function_call(event->cpu, __ring_buffer_output_stop, rb);
>
> I'm not sure about the different semantics between event->cpu == -1 and
> not, but yes, something along those likes.

So this also doesn't quite cut it, since we also have children (which
aren't explicitly ring_buffer_attach()ed to the ring buffer, but will
still write there) and in particular children of those events that are
on rb->event_list, which triples the fun of synchronizing and iterating
these.

So I tried a different approach: iterating through pmu's contexts'
instead. Consider the following.

+static void __perf_event_output_stop(struct perf_event *event, void *data)
+{
+ struct perf_event *parent = event->parent;
+ struct ring_buffer *rb = data;
+
+ if (rcu_dereference(event->rb) == rb)
+ __perf_event_stop(event);
+ if (parent && rcu_dereference(parent->rb) == rb)
+ __perf_event_stop(event);
+}
+
+static int __perf_pmu_output_stop(void *info)
+{
+ struct perf_event *event = info;
+ struct pmu *pmu = event->pmu;
+ struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+
+ rcu_read_lock();
+ perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, event->rb, true);
+ if (cpuctx->task_ctx)
+ perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ event->rb, true);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static void perf_pmu_output_stop(struct perf_event *event)
+{
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ cpu_function_call(cpu, __perf_pmu_output_stop, event);
+ }
+ put_online_cpus();
+}

And then we just call this perf_pmu_output_stop() from perf_mmap_close()
under mmap_mutex, before rb_free_aux(). Likely going through online cpus
is not necessary, but I'm gonna grab a lunch first.

I also hacked perf_event_aux_ctx() to make the above work, like so:

@@ -5696,15 +5696,18 @@ typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
static void
perf_event_aux_ctx(struct perf_event_context *ctx,
perf_event_aux_output_cb output,
- void *data)
+ void *data, bool all)
{
struct perf_event *event;

list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
- if (event->state < PERF_EVENT_STATE_INACTIVE)
- continue;
- if (!event_filter_match(event))
- continue;
+ if (!all) {
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ continue;
+ if (!event_filter_match(event))
+ continue;
+ }
+
output(event, data);
}
}

How does this look to you?

Regards,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/