Re: [PATCH 6/6] hwmon: (pwm-fan) Use devm_thermal_of_cooling_device_register

From: Marek Szyprowski
Date: Mon May 20 2019 - 11:23:55 EST


Dear All,

On 2019-04-18 21:58, Guenter Roeck wrote:
> Use devm_thermal_of_cooling_device_register() to register the cooling
> device. Also use devm_add_action_or_reset() to stop the fan on device
> removal, and to disable the pwm. Introduce a local 'dev' variable in
> the probe function to make the code easier to read.
>
> As a side effect, this fixes a bug seen if pwm_fan_of_get_cooling_data()
> returned an error. In that situation, the pwm was not disabled, and
> the fan was not stopped. Using devm functions also ensures that the
> pwm is disabled and that the fan is stopped only after the hwmon device
> has been unregistered.
>
> Cc: Lukasz Majewski <l.majewski@xxxxxxxxxxx>
> Signed-off-by: Guenter Roeck <linux@xxxxxxxxxxxx>


I've noticed the following lockdep warning after this commit during CPU
hotplug tests on TM2e board (ARM64 Exynos5433). It looks like a false
positive, but it would be nice to annotate it somehow in the code to
make lockdep happy:

--->8---

IRQ 8: no longer affine to CPU5
CPU5: shutdown
IRQ 9: no longer affine to CPU6
CPU6: shutdown

======================================================
WARNING: possible circular locking dependency detected
5.2.0-rc1+ #6093 Not tainted
------------------------------------------------------
cpuhp/7/43 is trying to acquire lock:
00000000d1a60be3 (thermal_list_lock){+.+.}, at:
thermal_cooling_device_unregister+0x34/0x1c0

but task is already holding lock:
00000000a6a56c92 (&policy->rwsem){++++}, at: cpufreq_offline+0x68/0x228

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (&policy->rwsem){++++}:
ÂÂÂÂÂÂ down_write+0x48/0x98
ÂÂÂÂÂÂ cpufreq_cpu_acquire+0x20/0x58
ÂÂÂÂÂÂ cpufreq_update_policy+0x28/0xc0
ÂÂÂÂÂÂ cpufreq_set_cur_state+0x44/0x70
ÂÂÂÂÂÂ thermal_cdev_update+0x7c/0x240
ÂÂÂÂÂÂ step_wise_throttle+0x4c/0x88
ÂÂÂÂÂÂ handle_thermal_trip+0xc0/0x348
ÂÂÂÂÂÂ thermal_zone_device_update.part.7+0x6c/0x250
ÂÂÂÂÂÂ thermal_zone_device_update+0x28/0x38
ÂÂÂÂÂÂ exynos_tmu_work+0x28/0x70
ÂÂÂÂÂÂ process_one_work+0x298/0x6c0
ÂÂÂÂÂÂ worker_thread+0x48/0x430
ÂÂÂÂÂÂ kthread+0x100/0x130
ÂÂÂÂÂÂ ret_from_fork+0x10/0x18

-> #2 (&cdev->lock){+.+.}:
ÂÂÂÂÂÂ __mutex_lock+0x90/0x890
ÂÂÂÂÂÂ mutex_lock_nested+0x1c/0x28
ÂÂÂÂÂÂ thermal_zone_bind_cooling_device+0x2cc/0x3e0
ÂÂÂÂÂÂ of_thermal_bind+0x9c/0xf8
ÂÂÂÂÂÂ __thermal_cooling_device_register+0x1a4/0x388
ÂÂÂÂÂÂ thermal_of_cooling_device_register+0xc/0x18
ÂÂÂÂÂÂ __cpufreq_cooling_register+0x360/0x410
ÂÂÂÂÂÂ of_cpufreq_cooling_register+0x84/0xf8
ÂÂÂÂÂÂ cpufreq_online+0x414/0x720
ÂÂÂÂÂÂ cpufreq_add_dev+0x78/0x88
ÂÂÂÂÂÂ subsys_interface_register+0xa4/0xf8
ÂÂÂÂÂÂ cpufreq_register_driver+0x140/0x1e0
ÂÂÂÂÂÂ dt_cpufreq_probe+0xb0/0x130
ÂÂÂÂÂÂ platform_drv_probe+0x50/0xa8
ÂÂÂÂÂÂ really_probe+0x1b0/0x2a0
ÂÂÂÂÂÂ driver_probe_device+0x58/0x100
ÂÂÂÂÂÂ __device_attach_driver+0x90/0xc0
ÂÂÂÂÂÂ bus_for_each_drv+0x70/0xc8
ÂÂÂÂÂÂ __device_attach+0xdc/0x140
ÂÂÂÂÂÂ device_initial_probe+0x10/0x18
ÂÂÂÂÂÂ bus_probe_device+0x94/0xa0
ÂÂÂÂÂÂ device_add+0x39c/0x5c8
ÂÂÂÂÂÂ platform_device_add+0x110/0x248
ÂÂÂÂÂÂ platform_device_register_full+0x134/0x178
ÂÂÂÂÂÂ cpufreq_dt_platdev_init+0x114/0x14c
ÂÂÂÂÂÂ do_one_initcall+0x84/0x430
ÂÂÂÂÂÂ kernel_init_freeable+0x440/0x534
ÂÂÂÂÂÂ kernel_init+0x10/0x108
ÂÂÂÂÂÂ ret_from_fork+0x10/0x18

-> #1 (&tz->lock){+.+.}:
ÂÂÂÂÂÂ __mutex_lock+0x90/0x890
ÂÂÂÂÂÂ mutex_lock_nested+0x1c/0x28
ÂÂÂÂÂÂ thermal_zone_bind_cooling_device+0x2b8/0x3e0
ÂÂÂÂÂÂ of_thermal_bind+0x9c/0xf8
ÂÂÂÂÂÂ __thermal_cooling_device_register+0x1a4/0x388
ÂÂÂÂÂÂ thermal_of_cooling_device_register+0xc/0x18
ÂÂÂÂÂÂ __cpufreq_cooling_register+0x360/0x410
ÂÂÂÂÂÂ of_cpufreq_cooling_register+0x84/0xf8
ÂÂÂÂÂÂ cpufreq_online+0x414/0x720
ÂÂÂÂÂÂ cpufreq_add_dev+0x78/0x88
ÂÂÂÂÂÂ subsys_interface_register+0xa4/0xf8
ÂÂÂÂÂÂ cpufreq_register_driver+0x140/0x1e0
ÂÂÂÂÂÂ dt_cpufreq_probe+0xb0/0x130
ÂÂÂÂÂÂ platform_drv_probe+0x50/0xa8
ÂÂÂÂÂÂ really_probe+0x1b0/0x2a0
ÂÂÂÂÂÂ driver_probe_device+0x58/0x100
ÂÂÂÂÂÂ __device_attach_driver+0x90/0xc0
ÂÂÂÂÂÂ bus_for_each_drv+0x70/0xc8
ÂÂÂÂÂÂ __device_attach+0xdc/0x140
ÂÂÂÂÂÂ device_initial_probe+0x10/0x18
ÂÂÂÂÂÂ bus_probe_device+0x94/0xa0
ÂÂÂÂÂÂ device_add+0x39c/0x5c8
ÂÂÂÂÂÂ platform_device_add+0x110/0x248
ÂÂÂÂÂÂ platform_device_register_full+0x134/0x178
ÂÂÂÂÂÂ cpufreq_dt_platdev_init+0x114/0x14c
ÂÂÂÂÂÂ do_one_initcall+0x84/0x430
ÂÂÂÂÂÂ kernel_init_freeable+0x440/0x534
ÂÂÂÂÂÂ kernel_init+0x10/0x108
ÂÂÂÂÂÂ ret_from_fork+0x10/0x18

-> #0 (thermal_list_lock){+.+.}:
ÂÂÂÂÂÂ lock_acquire+0xdc/0x260
ÂÂÂÂÂÂ __mutex_lock+0x90/0x890
ÂÂÂÂÂÂ mutex_lock_nested+0x1c/0x28
ÂÂÂÂÂÂ thermal_cooling_device_unregister+0x34/0x1c0
ÂÂÂÂÂÂ cpufreq_cooling_unregister+0x78/0xd0
ÂÂÂÂÂÂ cpufreq_offline+0x200/0x228
ÂÂÂÂÂÂ cpuhp_cpufreq_offline+0xc/0x18
ÂÂÂÂÂÂ cpuhp_invoke_callback+0xd0/0xcb0
ÂÂÂÂÂÂ cpuhp_thread_fun+0x1e8/0x258
ÂÂÂÂÂÂ smpboot_thread_fn+0x1b4/0x2d0
ÂÂÂÂÂÂ kthread+0x100/0x130
ÂÂÂÂÂÂ ret_from_fork+0x10/0x18

other info that might help us debug this:

Chain exists of:
 thermal_list_lock --> &cdev->lock --> &policy->rwsem

ÂPossible unsafe locking scenario:

ÂÂÂÂÂÂ CPU0ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ CPU1
ÂÂÂÂÂÂ ----ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ----
 lock(&policy->rwsem);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ lock(&cdev->lock);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ lock(&policy->rwsem);
 lock(thermal_list_lock);

Â*** DEADLOCK ***

3 locks held by cpuhp/7/43:
Â#0: 00000000ae30cc2b (cpu_hotplug_lock.rw_sem){++++}, at:
cpuhp_thread_fun+0x34/0x258
Â#1: 00000000a0e2460a (cpuhp_state-down){+.+.}, at:
cpuhp_thread_fun+0x178/0x258
Â#2: 00000000a6a56c92 (&policy->rwsem){++++}, at:
cpufreq_offline+0x68/0x228

stack backtrace:
CPU: 7 PID: 43 Comm: cpuhp/7 Not tainted 5.2.0-rc1+ #6093
Hardware name: Samsung TM2E board (DT)
Call trace:
Âdump_backtrace+0x0/0x158
Âshow_stack+0x14/0x20
Âdump_stack+0xc8/0x114
Âprint_circular_bug+0x1cc/0x2d8
Â__lock_acquire+0x18f4/0x20f8
Âlock_acquire+0xdc/0x260
Â__mutex_lock+0x90/0x890
Âmutex_lock_nested+0x1c/0x28
Âthermal_cooling_device_unregister+0x34/0x1c0
Âcpufreq_cooling_unregister+0x78/0xd0
Âcpufreq_offline+0x200/0x228
Âcpuhp_cpufreq_offline+0xc/0x18
Âcpuhp_invoke_callback+0xd0/0xcb0
Âcpuhp_thread_fun+0x1e8/0x258
Âsmpboot_thread_fn+0x1b4/0x2d0
Âkthread+0x100/0x130
Âret_from_fork+0x10/0x18
IRQ 10: no longer affine to CPU7

--->8---

> ---
> drivers/hwmon/pwm-fan.c | 73 ++++++++++++++++++++-----------------------------
> 1 file changed, 29 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c
> index 167221c7628a..0243ba70107e 100644
> --- a/drivers/hwmon/pwm-fan.c
> +++ b/drivers/hwmon/pwm-fan.c
> @@ -207,33 +207,44 @@ static int pwm_fan_of_get_cooling_data(struct device *dev,
> return 0;
> }
>
> +static void pwm_fan_regulator_disable(void *data)
> +{
> + regulator_disable(data);
> +}
> +
> +static void pwm_fan_pwm_disable(void *data)
> +{
> + pwm_disable(data);
> +}
> +
> static int pwm_fan_probe(struct platform_device *pdev)
> {
> struct thermal_cooling_device *cdev;
> + struct device *dev = &pdev->dev;
> struct pwm_fan_ctx *ctx;
> struct device *hwmon;
> int ret;
> struct pwm_state state = { };
>
> - ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
> + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
> if (!ctx)
> return -ENOMEM;
>
> mutex_init(&ctx->lock);
>
> - ctx->pwm = devm_of_pwm_get(&pdev->dev, pdev->dev.of_node, NULL);
> + ctx->pwm = devm_of_pwm_get(dev, dev->of_node, NULL);
> if (IS_ERR(ctx->pwm)) {
> ret = PTR_ERR(ctx->pwm);
>
> if (ret != -EPROBE_DEFER)
> - dev_err(&pdev->dev, "Could not get PWM: %d\n", ret);
> + dev_err(dev, "Could not get PWM: %d\n", ret);
>
> return ret;
> }
>
> platform_set_drvdata(pdev, ctx);
>
> - ctx->reg_en = devm_regulator_get_optional(&pdev->dev, "fan");
> + ctx->reg_en = devm_regulator_get_optional(dev, "fan");
> if (IS_ERR(ctx->reg_en)) {
> if (PTR_ERR(ctx->reg_en) != -ENODEV)
> return PTR_ERR(ctx->reg_en);
> @@ -242,10 +253,11 @@ static int pwm_fan_probe(struct platform_device *pdev)
> } else {
> ret = regulator_enable(ctx->reg_en);
> if (ret) {
> - dev_err(&pdev->dev,
> - "Failed to enable fan supply: %d\n", ret);
> + dev_err(dev, "Failed to enable fan supply: %d\n", ret);
> return ret;
> }
> + devm_add_action_or_reset(dev, pwm_fan_regulator_disable,
> + ctx->reg_en);
> }
>
> ctx->pwm_value = MAX_PWM;
> @@ -257,62 +269,36 @@ static int pwm_fan_probe(struct platform_device *pdev)
>
> ret = pwm_apply_state(ctx->pwm, &state);
> if (ret) {
> - dev_err(&pdev->dev, "Failed to configure PWM\n");
> - goto err_reg_disable;
> + dev_err(dev, "Failed to configure PWM\n");
> + return ret;
> }
> + devm_add_action_or_reset(dev, pwm_fan_pwm_disable, ctx->pwm);
>
> - hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, "pwmfan",
> + hwmon = devm_hwmon_device_register_with_groups(dev, "pwmfan",
> ctx, pwm_fan_groups);
> if (IS_ERR(hwmon)) {
> - dev_err(&pdev->dev, "Failed to register hwmon device\n");
> - ret = PTR_ERR(hwmon);
> - goto err_pwm_disable;
> + dev_err(dev, "Failed to register hwmon device\n");
> + return PTR_ERR(hwmon);
> }
>
> - ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx);
> + ret = pwm_fan_of_get_cooling_data(dev, ctx);
> if (ret)
> return ret;
>
> ctx->pwm_fan_state = ctx->pwm_fan_max_state;
> if (IS_ENABLED(CONFIG_THERMAL)) {
> - cdev = thermal_of_cooling_device_register(pdev->dev.of_node,
> - "pwm-fan", ctx,
> - &pwm_fan_cooling_ops);
> + cdev = devm_thermal_of_cooling_device_register(dev,
> + dev->of_node, "pwm-fan", ctx, &pwm_fan_cooling_ops);
> if (IS_ERR(cdev)) {
> - dev_err(&pdev->dev,
> + dev_err(dev,
> "Failed to register pwm-fan as cooling device");
> - ret = PTR_ERR(cdev);
> - goto err_pwm_disable;
> + return PTR_ERR(cdev);
> }
> ctx->cdev = cdev;
> thermal_cdev_update(cdev);
> }
>
> return 0;
> -
> -err_pwm_disable:
> - state.enabled = false;
> - pwm_apply_state(ctx->pwm, &state);
> -
> -err_reg_disable:
> - if (ctx->reg_en)
> - regulator_disable(ctx->reg_en);
> -
> - return ret;
> -}
> -
> -static int pwm_fan_remove(struct platform_device *pdev)
> -{
> - struct pwm_fan_ctx *ctx = platform_get_drvdata(pdev);
> -
> - thermal_cooling_device_unregister(ctx->cdev);
> - if (ctx->pwm_value)
> - pwm_disable(ctx->pwm);
> -
> - if (ctx->reg_en)
> - regulator_disable(ctx->reg_en);
> -
> - return 0;
> }
>
> #ifdef CONFIG_PM_SLEEP
> @@ -380,7 +366,6 @@ MODULE_DEVICE_TABLE(of, of_pwm_fan_match);
>
> static struct platform_driver pwm_fan_driver = {
> .probe = pwm_fan_probe,
> - .remove = pwm_fan_remove,
> .driver = {
> .name = "pwm-fan",
> .pm = &pwm_fan_pm,

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland