Re: [PATCH V7 5/7] cpufreq: Register notifiers with the PM QoS framework

From: Dmitry Osipenko
Date: Sun Sep 22 2019 - 16:12:46 EST


08.07.2019 13:57, Viresh Kumar ÐÐÑÐÑ:
> This registers the notifiers for min/max frequency constraints with the
> PM QoS framework. The constraints are also taken into consideration in
> cpufreq_set_policy().
>
> This also relocates cpufreq_policy_put_kobj() as it is required to be
> called from cpufreq_policy_alloc() now.
>
> refresh_frequency_limits() is updated to avoid calling
> cpufreq_set_policy() for inactive policies and handle_update() is
> updated to have proper locking in place.
>
> No constraints are added until now though.
>
> Reviewed-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
> Reviewed-by: Ulf Hansson <ulf.hansson@xxxxxxxxxx>
> Signed-off-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> ---
> V6->V7:
> - All callers of refresh_frequency_limits(), except handle_update(),
> take the policy->rwsem and result in deadlock as
> refresh_frequency_limits() also takes the same lock again. Fix that
> by taking the rwsem from handle_update() instead.
>
> @Rafael: Sending it before Pavel has verified it as I would be offline
> later, in case you want to apply this today itself.
>
> drivers/cpufreq/cpufreq.c | 135 +++++++++++++++++++++++++++++---------
> include/linux/cpufreq.h | 3 +
> 2 files changed, 108 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index ceb57af15ca0..b96ef6db1bfe 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -26,6 +26,7 @@
> #include <linux/kernel_stat.h>
> #include <linux/module.h>
> #include <linux/mutex.h>
> +#include <linux/pm_qos.h>
> #include <linux/slab.h>
> #include <linux/suspend.h>
> #include <linux/syscore_ops.h>
> @@ -999,7 +1000,7 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
> {
> struct device *dev = get_cpu_device(cpu);
>
> - if (!dev)
> + if (unlikely(!dev))
> return;
>
> if (cpumask_test_and_set_cpu(cpu, policy->real_cpus))
> @@ -1117,14 +1118,16 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
>
> static void refresh_frequency_limits(struct cpufreq_policy *policy)
> {
> - struct cpufreq_policy new_policy = *policy;
> -
> - pr_debug("updating policy for CPU %u\n", policy->cpu);
> + struct cpufreq_policy new_policy;
>
> - new_policy.min = policy->user_policy.min;
> - new_policy.max = policy->user_policy.max;
> + if (!policy_is_inactive(policy)) {
> + new_policy = *policy;
> + pr_debug("updating policy for CPU %u\n", policy->cpu);
>
> - cpufreq_set_policy(policy, &new_policy);
> + new_policy.min = policy->user_policy.min;
> + new_policy.max = policy->user_policy.max;
> + cpufreq_set_policy(policy, &new_policy);
> + }
> }
>
> static void handle_update(struct work_struct *work)
> @@ -1133,14 +1136,60 @@ static void handle_update(struct work_struct *work)
> container_of(work, struct cpufreq_policy, update);
>
> pr_debug("handle_update for cpu %u called\n", policy->cpu);
> + down_write(&policy->rwsem);
> refresh_frequency_limits(policy);
> + up_write(&policy->rwsem);
> +}
> +
> +static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq,
> + void *data)
> +{
> + struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_min);
> +
> + schedule_work(&policy->update);
> + return 0;
> +}
> +
> +static int cpufreq_notifier_max(struct notifier_block *nb, unsigned long freq,
> + void *data)
> +{
> + struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_max);
> +
> + schedule_work(&policy->update);
> + return 0;
> +}
> +
> +static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
> +{
> + struct kobject *kobj;
> + struct completion *cmp;
> +
> + down_write(&policy->rwsem);
> + cpufreq_stats_free_table(policy);
> + kobj = &policy->kobj;
> + cmp = &policy->kobj_unregister;
> + up_write(&policy->rwsem);
> + kobject_put(kobj);
> +
> + /*
> + * We need to make sure that the underlying kobj is
> + * actually not referenced anymore by anybody before we
> + * proceed with unloading.
> + */
> + pr_debug("waiting for dropping of refcount\n");
> + wait_for_completion(cmp);
> + pr_debug("wait complete\n");
> }
>
> static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
> {
> struct cpufreq_policy *policy;
> + struct device *dev = get_cpu_device(cpu);
> int ret;
>
> + if (!dev)
> + return NULL;
> +
> policy = kzalloc(sizeof(*policy), GFP_KERNEL);
> if (!policy)
> return NULL;
> @@ -1157,7 +1206,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
> ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
> cpufreq_global_kobject, "policy%u", cpu);
> if (ret) {
> - pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
> + dev_err(dev, "%s: failed to init policy->kobj: %d\n", __func__, ret);
> /*
> * The entire policy object will be freed below, but the extra
> * memory allocated for the kobject name needs to be freed by
> @@ -1167,6 +1216,25 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
> goto err_free_real_cpus;
> }
>
> + policy->nb_min.notifier_call = cpufreq_notifier_min;
> + policy->nb_max.notifier_call = cpufreq_notifier_max;
> +
> + ret = dev_pm_qos_add_notifier(dev, &policy->nb_min,
> + DEV_PM_QOS_MIN_FREQUENCY);
> + if (ret) {
> + dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n",
> + ret, cpumask_pr_args(policy->cpus));
> + goto err_kobj_remove;
> + }
> +
> + ret = dev_pm_qos_add_notifier(dev, &policy->nb_max,
> + DEV_PM_QOS_MAX_FREQUENCY);
> + if (ret) {
> + dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n",
> + ret, cpumask_pr_args(policy->cpus));
> + goto err_min_qos_notifier;
> + }
> +
> INIT_LIST_HEAD(&policy->policy_list);
> init_rwsem(&policy->rwsem);
> spin_lock_init(&policy->transition_lock);
> @@ -1177,6 +1245,11 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
> policy->cpu = cpu;
> return policy;
>
> +err_min_qos_notifier:
> + dev_pm_qos_remove_notifier(dev, &policy->nb_min,
> + DEV_PM_QOS_MIN_FREQUENCY);
> +err_kobj_remove:
> + cpufreq_policy_put_kobj(policy);
> err_free_real_cpus:
> free_cpumask_var(policy->real_cpus);
> err_free_rcpumask:
> @@ -1189,30 +1262,9 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
> return NULL;
> }
>
> -static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
> -{
> - struct kobject *kobj;
> - struct completion *cmp;
> -
> - down_write(&policy->rwsem);
> - cpufreq_stats_free_table(policy);
> - kobj = &policy->kobj;
> - cmp = &policy->kobj_unregister;
> - up_write(&policy->rwsem);
> - kobject_put(kobj);
> -
> - /*
> - * We need to make sure that the underlying kobj is
> - * actually not referenced anymore by anybody before we
> - * proceed with unloading.
> - */
> - pr_debug("waiting for dropping of refcount\n");
> - wait_for_completion(cmp);
> - pr_debug("wait complete\n");
> -}
> -
> static void cpufreq_policy_free(struct cpufreq_policy *policy)
> {
> + struct device *dev = get_cpu_device(policy->cpu);
> unsigned long flags;
> int cpu;
>
> @@ -1224,6 +1276,11 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
> per_cpu(cpufreq_cpu_data, cpu) = NULL;
> write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
> + dev_pm_qos_remove_notifier(dev, &policy->nb_max,
> + DEV_PM_QOS_MAX_FREQUENCY);
> + dev_pm_qos_remove_notifier(dev, &policy->nb_min,
> + DEV_PM_QOS_MIN_FREQUENCY);
> +
> cpufreq_policy_put_kobj(policy);
> free_cpumask_var(policy->real_cpus);
> free_cpumask_var(policy->related_cpus);
> @@ -2283,6 +2340,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
> struct cpufreq_policy *new_policy)
> {
> struct cpufreq_governor *old_gov;
> + struct device *cpu_dev = get_cpu_device(policy->cpu);
> + unsigned long min, max;
> int ret;
>
> pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
> @@ -2297,11 +2356,27 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
> if (new_policy->min > new_policy->max)
> return -EINVAL;
>
> + /*
> + * PM QoS framework collects all the requests from users and provide us
> + * the final aggregated value here.
> + */
> + min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY);
> + max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY);
> +
> + if (min > new_policy->min)
> + new_policy->min = min;
> + if (max < new_policy->max)
> + new_policy->max = max;
> +
> /* verify the cpu speed can be set within this limit */
> ret = cpufreq_driver->verify(new_policy);
> if (ret)
> return ret;
>
> + /*
> + * The notifier-chain shall be removed once all the users of
> + * CPUFREQ_ADJUST are moved to use the QoS framework.
> + */
> /* adjust if necessary - all reasons */
> blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> CPUFREQ_ADJUST, new_policy);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index a1467aa7f58b..95425941f46d 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -147,6 +147,9 @@ struct cpufreq_policy {
>
> /* Pointer to the cooling device if used for thermal mitigation */
> struct thermal_cooling_device *cdev;
> +
> + struct notifier_block nb_min;
> + struct notifier_block nb_max;
> };
>
> struct cpufreq_freqs {
>

Hello Viresh,

This patch causes use-after-free on a cpufreq driver module reload. Please take a look, thanks in advance.


[ 87.952369] ==================================================================
[ 87.953259] BUG: KASAN: use-after-free in notifier_chain_register+0x4f/0x9c
[ 87.954031] Read of size 4 at addr e6abbd0c by task modprobe/243

[ 87.954901] CPU: 1 PID: 243 Comm: modprobe Tainted: G W
5.3.0-next-20190920-00185-gf61698eab956-dirty #2408
[ 87.956077] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
[ 87.956807] [<c0110aad>] (unwind_backtrace) from [<c010bb71>] (show_stack+0x11/0x14)
[ 87.957709] [<c010bb71>] (show_stack) from [<c0d37b25>] (dump_stack+0x89/0x98)
[ 87.958616] [<c0d37b25>] (dump_stack) from [<c02937e1>]
(print_address_description.constprop.0+0x3d/0x340)
[ 87.959785] [<c02937e1>] (print_address_description.constprop.0) from [<c0293c6b>]
(__kasan_report+0xe3/0x12c)
[ 87.960907] [<c0293c6b>] (__kasan_report) from [<c014988f>] (notifier_chain_register+0x4f/0x9c)
[ 87.962001] [<c014988f>] (notifier_chain_register) from [<c01499b5>]
(blocking_notifier_chain_register+0x29/0x3c)
[ 87.963180] [<c01499b5>] (blocking_notifier_chain_register) from [<c06f7ee9>]
(dev_pm_qos_add_notifier+0x79/0xf8)
[ 87.964339] [<c06f7ee9>] (dev_pm_qos_add_notifier) from [<c092927d>] (cpufreq_online+0x5e1/0x8a4)
[ 87.965351] [<c092927d>] (cpufreq_online) from [<c09295c9>] (cpufreq_add_dev+0x79/0x80)
[ 87.966247] [<c09295c9>] (cpufreq_add_dev) from [<c06eb9d3>] (subsys_interface_register+0xc3/0x100)
[ 87.967297] [<c06eb9d3>] (subsys_interface_register) from [<c0926e53>]
(cpufreq_register_driver+0x13b/0x1ec)
[ 87.968476] [<c0926e53>] (cpufreq_register_driver) from [<bf800435>]
(tegra20_cpufreq_probe+0x165/0x1a8 [tegra20_cpufreq])
[ 87.969711] [<bf800435>] (tegra20_cpufreq_probe [tegra20_cpufreq]) from [<c06ef7a5>]
(platform_drv_probe+0x49/0x88)
[ 87.970854] [<c06ef7a5>] (platform_drv_probe) from [<c06ed299>] (really_probe+0x109/0x374)
[ 87.971771] [<c06ed299>] (really_probe) from [<c06ed64f>] (driver_probe_device+0x57/0x158)
[ 87.972685] [<c06ed64f>] (driver_probe_device) from [<c06ed8fd>] (device_driver_attach+0x61/0x64)
[ 87.973658] [<c06ed8fd>] (device_driver_attach) from [<c06ed949>] (__driver_attach+0x49/0xa0)
[ 87.974599] [<c06ed949>] (__driver_attach) from [<c06eb62d>] (bus_for_each_dev+0x69/0x94)
[ 87.975510] [<c06eb62d>] (bus_for_each_dev) from [<c06ec731>] (bus_add_driver+0x179/0x1e8)
[ 87.976427] [<c06ec731>] (bus_add_driver) from [<c06ee4ab>] (driver_register+0x8f/0x130)
[ 87.977345] [<c06ee4ab>] (driver_register) from [<bf805017>]
(tegra20_cpufreq_driver_init+0x17/0x1000 [tegra20_cpufreq])
[ 87.978553] [<bf805017>] (tegra20_cpufreq_driver_init [tegra20_cpufreq]) from [<c0102f35>]
(do_one_initcall+0x4d/0x280)
[ 87.979729] [<c0102f35>] (do_one_initcall) from [<c01c8675>] (do_init_module+0xb9/0x288)
[ 87.980618] [<c01c8675>] (do_init_module) from [<c01cb105>] (load_module+0x2829/0x2b90)
[ 87.981496] [<c01cb105>] (load_module) from [<c01cb62b>] (sys_finit_module+0x7b/0x8c)
[ 87.982358] [<c01cb62b>] (sys_finit_module) from [<c0101001>] (ret_fast_syscall+0x1/0x26)
[ 87.983240] Exception stack(0xe4babfa8 to 0xe4babff0)
[ 87.983809] bfa0: 0003f210 00000001 00000003 0002b744 00000000 b6611e64
[ 87.984703] bfc0: 0003f210 00000001 29a92d00 0000017b 0003f2a8 00000000 0003f210 00040008
[ 87.985588] bfe0: b6611de8 b6611dd8 00022534 aec402e0

[ 87.986340] Allocated by task 182:
[ 87.986756] cpufreq_online+0x55f/0x8a4
[ 87.987204] cpufreq_add_dev+0x79/0x80
[ 87.987641] subsys_interface_register+0xc3/0x100
[ 87.988178] cpufreq_register_driver+0x13b/0x1ec
[ 87.988719] tegra20_cpufreq_probe+0x165/0x1a8 [tegra20_cpufreq]
[ 87.989389] platform_drv_probe+0x49/0x88
[ 87.989849] really_probe+0x109/0x374
[ 88.021245] driver_probe_device+0x57/0x158
[ 88.052890] device_driver_attach+0x61/0x64
[ 88.084012] __driver_attach+0x49/0xa0
[ 88.114533] bus_for_each_dev+0x69/0x94
[ 88.144898] bus_add_driver+0x179/0x1e8
[ 88.174840] driver_register+0x8f/0x130
[ 88.204398] tegra20_cpufreq_driver_init+0x17/0x1000 [tegra20_cpufreq]
[ 88.234165] do_one_initcall+0x4d/0x280
[ 88.263754] do_init_module+0xb9/0x288
[ 88.292773] load_module+0x2829/0x2b90
[ 88.321106] sys_finit_module+0x7b/0x8c
[ 88.349098] ret_fast_syscall+0x1/0x26
[ 88.376732] 0xb6802c60

[ 88.430292] Freed by task 239:
[ 88.456279] __kasan_slab_free+0x9f/0xc4
[ 88.481802] kfree+0x71/0x1f4
[ 88.506805] subsys_interface_unregister+0xad/0xf0
[ 88.531524] cpufreq_unregister_driver+0x2f/0x7c
[ 88.555909] tegra20_cpufreq_remove+0x19/0x48 [tegra20_cpufreq]
[ 88.580273] platform_drv_remove+0x27/0x34
[ 88.604156] device_release_driver_internal+0xdf/0x1a4
[ 88.627659] driver_detach+0x85/0xf8
[ 88.650458] bus_remove_driver+0x53/0xb0
[ 88.672706] tegra20_cpufreq_driver_exit+0x9/0xb88 [tegra20_cpufreq]
[ 88.695100] sys_delete_module+0x14d/0x1a0
[ 88.717147] ret_fast_syscall+0x1/0x26
[ 88.738799] 0xb6d35ff4

[ 88.780543] The buggy address belongs to the object at e6abbc00
which belongs to the cache kmalloc-512 of size 512
[ 88.821845] The buggy address is located 268 bytes inside of
512-byte region [e6abbc00, e6abbe00)
[ 88.861188] The buggy address belongs to the page:
[ 88.880380] page:edcbc700 refcount:1 mapcount:0 mapping:e7001a00 index:0x0 compound_mapcount: 0
[ 88.900167] flags: 0x10200(slab|head)
[ 88.919599] raw: 00010200 edcafb00 00030003 e7001a00 00000000 00100010 ffffffff 00000001
[ 88.939423] page dumped because: kasan: bad access detected

[ 88.978080] Memory state around the buggy address:
[ 88.997237] e6abbc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 89.016710] e6abbc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 89.036051] >e6abbd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 89.055192] ^
[ 89.074260] e6abbd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 89.093832] e6abbe00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 89.113300] ==================================================================