Re: [RFC PATCH V3 2/7] perf: attach/detach PMU specific data

From: Andi Kleen
Date: Wed Jan 08 2020 - 11:50:12 EST


> +static int
> +attach_system_wide_ctx_data(size_t ctx_size)
> +{
> + int i, num_thread, pos, nr_failed_alloc;
> + unsigned long flags = GFP_ATOMIC;
> + struct perf_ctx_data *tsk_data;
> + struct perf_ctx_data **data;
> + struct task_struct *g, *p;
> + bool re_alloc = true;
> +
> + /* Retrieve total number of threads */
> + num_thread = nr_threads;
> +
> + data = kcalloc(num_thread, sizeof(*data), GFP_KERNEL);

This probably needs kvcalloc for reliability and avoiding stalls.

> + if (!data) {
> + printk_once(KERN_DEBUG
> + "Failed to allocate space for LBR callstack. "
> + "The LBR callstack for all tasks may be cutoff.\n");
> + return -ENOMEM;
> + }
> +
> + atomic_inc(&nr_task_data_sys_wide_events);
> +
> +repeat:
> + /*
> + * Allocate perf_ctx_data for all existing threads.
> + * The perf_ctx_data for new threads will be allocated in
> + * perf_event_fork().
> + * Do a quick allocation in first round with GFP_ATOMIC.
> + */
> + for (i = 0; i < num_thread; i++) {
> + if (alloc_perf_ctx_data(ctx_size, flags, &data[i]))
> + break;
> + }
> + num_thread = i;
> + nr_failed_alloc = 0;
> + pos = 0;
> +

> + rcu_read_lock();
> + for_each_process_thread(g, p) {
> + raw_spin_lock(&p->perf_ctx_data_lock);
> + tsk_data = p->perf_ctx_data;
> + if (tsk_data) {

That will be a lot of locks even for tasks that don't use perf, but I guess we
really need it and it's bounded by the number of tasks.

> + }
> +
> + if (pos < num_thread) {
> + refcount_set(&data[pos]->refcount, TASK_DATA_SYS_WIDE);
> + rcu_assign_pointer(p->perf_ctx_data, data[pos++]);
> + } else {
> + /*
> + * The quick allocation in first round may be failed.
> + * Track the number in nr_failed_alloc.
> + */
> + nr_failed_alloc++;
> + }
> + raw_spin_unlock(&p->perf_ctx_data_lock);
> + }
> + rcu_read_unlock();


-Andi