Re: [PATCH v2 5/5] perf/x86/amd/power: Add AMD accumulated power reporting mechanism
From: Peter Zijlstra
Date: Tue Jan 19 2016 - 07:14:05 EST
On Thu, Jan 14, 2016 at 10:50:08AM +0800, Huang Rui wrote:
> +struct power_pmu {
> + spinlock_t lock;
This should be a raw_spinlock_t, as it'll be nested under other
raw_spinlock_t's.
> + struct list_head active_list;
> + struct pmu *pmu; /* pointer to power_pmu_class */
> + local64_t cpu_sw_pwr_ptsc;
> +};
> +static void pmu_event_start(struct perf_event *event, int mode)
> +{
> + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pmu->lock, flags);
IRQs will be disabled here.
> + __pmu_event_start(pmu, event);
> + spin_unlock_irqrestore(&pmu->lock, flags);
> +}
> +
> +static void pmu_event_stop(struct perf_event *event, int mode)
> +{
> + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu);
> + struct hw_perf_event *hwc = &event->hw;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pmu->lock, flags);
idem
> +
> + /* mark event as deactivated and stopped */
> + if (!(hwc->state & PERF_HES_STOPPED)) {
> + list_del(&event->active_entry);
> + hwc->state |= PERF_HES_STOPPED;
> + }
> +
> + /* check if update of sw counter is necessary */
> + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
> + /*
> + * Drain the remaining delta count out of a event
> + * that we are disabling:
> + */
> + event_update(event, pmu);
> + hwc->state |= PERF_HES_UPTODATE;
> + }
> +
> + spin_unlock_irqrestore(&pmu->lock, flags);
> +}
> +
> +static int pmu_event_add(struct perf_event *event, int mode)
> +{
> + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu);
> + struct hw_perf_event *hwc = &event->hw;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pmu->lock, flags);
> +
idem
> + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> + if (mode & PERF_EF_START)
> + __pmu_event_start(pmu, event);
> +
> + spin_unlock_irqrestore(&pmu->lock, flags);
> +
> + return 0;
> +}
> +static int power_cpu_init(int cpu)
> +{
> + int i, cu, ret = 0;
> + cpumask_var_t mask, dummy_mask;
> +
> + cu = cpu / cores_per_cu;
> +
> + if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
> + return -ENOMEM;
> +
> + if (!zalloc_cpumask_var(&dummy_mask, GFP_KERNEL)) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < cores_per_cu; i++)
> + cpumask_set_cpu(i, mask);
> +
> + cpumask_shift_left(mask, mask, cu * cores_per_cu);
> +
> + if (!cpumask_and(dummy_mask, mask, &cpu_mask))
> + cpumask_set_cpu(cpu, &cpu_mask);
> +
> + free_cpumask_var(dummy_mask);
> +out:
> + free_cpumask_var(mask);
> +
> + return ret;
> +}
> +static int power_cpu_notifier(struct notifier_block *self,
> + unsigned long action, void *hcpu)
> +{
> + unsigned int cpu = (long)hcpu;
> +
> + switch (action & ~CPU_TASKS_FROZEN) {
> + case CPU_UP_PREPARE:
> + if (power_cpu_prepare(cpu))
> + return NOTIFY_BAD;
> + break;
> + case CPU_STARTING:
> + if (power_cpu_init(cpu))
> + return NOTIFY_BAD;
this is called with IRQs disabled, which makes those GFP_KERNEL allocs
above a pretty bad idea.
Also, note that -rt cannot actually do _any_ allocations/frees from
STARTING.
Please move the allocs/frees to PREPARE/ONLINE.
> + break;
> + case CPU_ONLINE:
> + case CPU_DEAD:
> + power_cpu_kfree(cpu);
> + break;
> + case CPU_DOWN_PREPARE:
> + if (power_cpu_exit(cpu))
> + return NOTIFY_BAD;
> + break;
> + default:
> + break;
> + }
> +
> + return NOTIFY_OK;
> +}