Re: [PATCH 7/8] pwm: Add more locking

From: Marek Szyprowski
Date: Thu Apr 04 2024 - 08:10:05 EST


Hi Uwe,

On 17.03.2024 11:40, Uwe Kleine-König wrote:
> This ensures that a pwm_chip that has no corresponding driver isn't used
> and that a driver doesn't go away while a callback is still running.
>
> In the presence of device links this isn't necessary yet (so this is no
> fix) but for pwm character device support this is needed.
>
> To not serialize all pwm_apply_state() calls, this introduces a per chip
> lock. An additional complication is that for atomic chips a mutex cannot
> be used (as pwm_apply_atomic() must not sleem) and a spinlock cannot be
> held while calling an operation for a sleeping chip. So depending on the
> chip being atomic or not a spinlock or a mutex is used.
>
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig@xxxxxxxxxxxxxx>


This patch landed some time ago in linux-next as commit a740f7879609
("pwm: Add more locking"). Recently I've finally found that $subject
patch is responsible for the following lock dep splat observed for some
time during day-to-day linux-next testing:

======================================================
WARNING: possible circular locking dependency detected
6.9.0-rc1+ #14790 Tainted: G         C
------------------------------------------------------
kworker/u24:4/59 is trying to acquire lock:
ffff00003c65b510 (&chip->nonatomic_lock){+.+.}-{3:3}, at:
pwm_apply_might_sleep+0x90/0xd8

but task is already holding lock:
ffff80008310fa40 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x4c/0xa8

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (prepare_lock){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       __mutex_lock+0xa0/0x840
       mutex_lock_nested+0x24/0x30
       clk_prepare_lock+0x4c/0xa8
       clk_round_rate+0x38/0xd0
       meson_pwm_apply+0x128/0x220 [pwm_meson]
       __pwm_apply+0x64/0x1f8
       pwm_apply_might_sleep+0x58/0xd8
       pwm_regulator_set_voltage+0xbc/0x12c
       _regulator_do_set_voltage+0x110/0x688
       regulator_set_voltage_rdev+0x64/0x25c
       regulator_do_balance_voltage+0x20c/0x47c
       regulator_balance_voltage+0x50/0x9c
       regulator_set_voltage_unlocked+0xa4/0x128
       regulator_set_voltage+0x50/0xec
       _opp_config_regulator_single+0x4c/0x130
       _set_opp+0xfc/0x544
       dev_pm_opp_set_rate+0x190/0x284
       set_target+0x34/0x40
       __cpufreq_driver_target+0x170/0x290
       cpufreq_online+0x814/0xa3c
       cpufreq_add_dev+0x80/0x98
       subsys_interface_register+0xfc/0x118
       cpufreq_register_driver+0x150/0x234
       dt_cpufreq_probe+0x150/0x480
       platform_probe+0x68/0xd8
       really_probe+0xbc/0x2a0
       __driver_probe_device+0x78/0x12c
       driver_probe_device+0xdc/0x164
       __device_attach_driver+0xb8/0x138
       bus_for_each_drv+0x84/0xe0
       __device_attach+0xa8/0x1b0
       device_initial_probe+0x14/0x20
       bus_probe_device+0xb0/0xb4
       deferred_probe_work_func+0x8c/0xc8
       process_one_work+0x220/0x634
       worker_thread+0x268/0x3a8
       kthread+0x124/0x128
       ret_from_fork+0x10/0x20

-> #0 (&chip->nonatomic_lock){+.+.}-{3:3}:
       __lock_acquire+0x1318/0x20c4
       lock_acquire.part.0+0xc8/0x20c
       lock_acquire+0x68/0x84
       __mutex_lock+0xa0/0x840
       mutex_lock_nested+0x24/0x30
       pwm_apply_might_sleep+0x90/0xd8
       clk_pwm_prepare+0x54/0x84
       clk_core_prepare+0xc8/0x2f8
       clk_prepare+0x28/0x44
       mmc_pwrseq_simple_pre_power_on+0x4c/0x8c
       mmc_pwrseq_pre_power_on+0x24/0x34
       mmc_power_up.part.0+0x20/0x16c
       mmc_start_host+0xa0/0xac
       mmc_add_host+0x84/0xf0
       meson_mmc_probe+0x318/0x3e8
       platform_probe+0x68/0xd8
       really_probe+0xbc/0x2a0
       __driver_probe_device+0x78/0x12c
       driver_probe_device+0xdc/0x164
       __device_attach_driver+0xb8/0x138
       bus_for_each_drv+0x84/0xe0
       __device_attach_async_helper+0xb0/0xd4
       async_run_entry_fn+0x34/0xe0
       process_one_work+0x220/0x634
       worker_thread+0x268/0x3a8
       kthread+0x124/0x128
       ret_from_fork+0x10/0x20

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(prepare_lock);
                               lock(&chip->nonatomic_lock);
                               lock(prepare_lock);
  lock(&chip->nonatomic_lock);

 *** DEADLOCK ***

4 locks held by kworker/u24:4/59:
 #0: ffff000000220d48 ((wq_completion)async){+.+.}-{0:0}, at:
process_one_work+0x1a0/0x634
 #1: ffff80008461bde0 ((work_completion)(&entry->work)){+.+.}-{0:0},
at: process_one_work+0x1c8/0x634
 #2: ffff0000015bf0f8 (&dev->mutex){....}-{3:3}, at:
__device_attach_async_helper+0x3c/0xd4
 #3: ffff80008310fa40 (prepare_lock){+.+.}-{3:3}, at:
clk_prepare_lock+0x4c/0xa8

stack backtrace:
CPU: 1 PID: 59 Comm: kworker/u24:4 Tainted: G         C 6.9.0-rc1+ #14790
Hardware name: Khadas VIM3 (DT)
Workqueue: async async_run_entry_fn
Call trace:
 dump_backtrace+0x98/0xf0
 show_stack+0x18/0x24
 dump_stack_lvl+0x90/0xd0
 dump_stack+0x18/0x24
 print_circular_bug+0x290/0x370
 check_noncircular+0x15c/0x170
 __lock_acquire+0x1318/0x20c4
 lock_acquire.part.0+0xc8/0x20c
 lock_acquire+0x68/0x84
 __mutex_lock+0xa0/0x840
 mutex_lock_nested+0x24/0x30
 pwm_apply_might_sleep+0x90/0xd8
 clk_pwm_prepare+0x54/0x84
 clk_core_prepare+0xc8/0x2f8
 clk_prepare+0x28/0x44
 mmc_pwrseq_simple_pre_power_on+0x4c/0x8c
 mmc_pwrseq_pre_power_on+0x24/0x34
 mmc_power_up.part.0+0x20/0x16c
 mmc_start_host+0xa0/0xac
 mmc_add_host+0x84/0xf0
 meson_mmc_probe+0x318/0x3e8
 platform_probe+0x68/0xd8
 really_probe+0xbc/0x2a0
 __driver_probe_device+0x78/0x12c
 driver_probe_device+0xdc/0x164
 __device_attach_driver+0xb8/0x138
 bus_for_each_drv+0x84/0xe0
 __device_attach_async_helper+0xb0/0xd4
 async_run_entry_fn+0x34/0xe0
 process_one_work+0x220/0x634
 worker_thread+0x268/0x3a8
 kthread+0x124/0x128
 ret_from_fork+0x10/0x20


I'm not really sure if this warning shows a real problem or just some
functions are missing lock dep annotations. Quite a lots of subsystems
are involved in it (clocks, regulators and pwms) and this issue is fully
reproducible here during system boot on Khadas VIM3 ARM64-based board.


> ---
> drivers/pwm/core.c | 98 +++++++++++++++++++++++++++++++++++++++++----
> include/linux/pwm.h | 13 ++++++
> 2 files changed, 103 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
> index 09ff6ef0b634..2e08fcfbe138 100644
> --- a/drivers/pwm/core.c
> +++ b/drivers/pwm/core.c
> @@ -29,6 +29,22 @@ static DEFINE_MUTEX(pwm_lock);
>
> static DEFINE_IDR(pwm_chips);
>
> +static void pwmchip_lock(struct pwm_chip *chip)
> +{
> + if (chip->atomic)
> + spin_lock(&chip->atomic_lock);
> + else
> + mutex_lock(&chip->nonatomic_lock);
> +}
> +
> +static void pwmchip_unlock(struct pwm_chip *chip)
> +{
> + if (chip->atomic)
> + spin_unlock(&chip->atomic_lock);
> + else
> + mutex_unlock(&chip->nonatomic_lock);
> +}
> +
> static void pwm_apply_debug(struct pwm_device *pwm,
> const struct pwm_state *state)
> {
> @@ -183,6 +199,7 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state)
> int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
> {
> int err;
> + struct pwm_chip *chip = pwm->chip;
>
> /*
> * Some lowlevel driver's implementations of .apply() make use of
> @@ -193,7 +210,13 @@ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
> */
> might_sleep();
>
> - if (IS_ENABLED(CONFIG_PWM_DEBUG) && pwm->chip->atomic) {
> + pwmchip_lock(chip);
> + if (!chip->operational) {
> + pwmchip_unlock(chip);
> + return -ENODEV;
> + }
> +
> + if (IS_ENABLED(CONFIG_PWM_DEBUG) && chip->atomic) {
> /*
> * Catch any drivers that have been marked as atomic but
> * that will sleep anyway.
> @@ -205,6 +228,8 @@ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
> err = __pwm_apply(pwm, state);
> }
>
> + pwmchip_unlock(chip);
> +
> return err;
> }
> EXPORT_SYMBOL_GPL(pwm_apply_might_sleep);
> @@ -291,16 +316,26 @@ EXPORT_SYMBOL_GPL(pwm_adjust_config);
> int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
> unsigned long timeout)
> {
> + struct pwm_chip *chip;
> int err;
>
> - if (!pwm || !pwm->chip->ops)
> + if (!pwm || !(chip = pwm->chip)->ops)
> return -EINVAL;
>
> - if (!pwm->chip->ops->capture)
> + if (!chip->ops->capture)
> return -ENOSYS;
>
> mutex_lock(&pwm_lock);
> - err = pwm->chip->ops->capture(pwm->chip, pwm, result, timeout);
> +
> + pwmchip_lock(chip);
> +
> + if (chip->operational)
> + err = chip->ops->capture(pwm->chip, pwm, result, timeout);
> + else
> + err = -ENODEV;
> +
> + pwmchip_unlock(chip);
> +
> mutex_unlock(&pwm_lock);
>
> return err;
> @@ -340,6 +375,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
> if (test_bit(PWMF_REQUESTED, &pwm->flags))
> return -EBUSY;
>
> + /*
> + * This function is called while holding pwm_lock. As .operational only
> + * changes while holding this lock, checking it here without holding the
> + * chip lock is fine.
> + */
> + if (!chip->operational)
> + return -ENODEV;
> +
> if (!try_module_get(chip->owner))
> return -ENODEV;
>
> @@ -368,7 +411,12 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
> */
> struct pwm_state state = { 0, };
>
> + pwmchip_lock(chip);
> +
> err = ops->get_state(chip, pwm, &state);
> +
> + pwmchip_unlock(chip);
> +
> trace_pwm_get(pwm, &state, err);
>
> if (!err)
> @@ -997,6 +1045,7 @@ struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t
>
> chip->npwm = npwm;
> chip->uses_pwmchip_alloc = true;
> + chip->operational = false;
>
> pwmchip_dev = &chip->dev;
> device_initialize(pwmchip_dev);
> @@ -1102,6 +1151,11 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
>
> chip->owner = owner;
>
> + if (chip->atomic)
> + spin_lock_init(&chip->atomic_lock);
> + else
> + mutex_init(&chip->nonatomic_lock);
> +
> mutex_lock(&pwm_lock);
>
> ret = idr_alloc(&pwm_chips, chip, 0, 0, GFP_KERNEL);
> @@ -1115,6 +1169,10 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
> if (IS_ENABLED(CONFIG_OF))
> of_pwmchip_add(chip);
>
> + pwmchip_lock(chip);
> + chip->operational = true;
> + pwmchip_unlock(chip);
> +
> ret = device_add(&chip->dev);
> if (ret)
> goto err_device_add;
> @@ -1124,6 +1182,10 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
> return 0;
>
> err_device_add:
> + pwmchip_lock(chip);
> + chip->operational = false;
> + pwmchip_unlock(chip);
> +
> if (IS_ENABLED(CONFIG_OF))
> of_pwmchip_remove(chip);
>
> @@ -1145,12 +1207,27 @@ EXPORT_SYMBOL_GPL(__pwmchip_add);
> void pwmchip_remove(struct pwm_chip *chip)
> {
> pwmchip_sysfs_unexport(chip);
> + unsigned int i;
> +
> + mutex_lock(&pwm_lock);
> +
> + pwmchip_lock(chip);
> + chip->operational = false;
> + pwmchip_unlock(chip);
> +
> + for (i = 0; i < chip->npwm; ++i) {
> + struct pwm_device *pwm = &chip->pwms[i];
> +
> + if (test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
> + dev_alert(&chip->dev, "Freeing requested PWM #%u\n", i);
> + if (pwm->chip->ops->free)
> + pwm->chip->ops->free(pwm->chip, pwm);
> + }
> + }
>
> if (IS_ENABLED(CONFIG_OF))
> of_pwmchip_remove(chip);
>
> - mutex_lock(&pwm_lock);
> -
> idr_remove(&pwm_chips, chip->id);
>
> mutex_unlock(&pwm_lock);
> @@ -1532,12 +1609,17 @@ void pwm_put(struct pwm_device *pwm)
>
> mutex_lock(&pwm_lock);
>
> - if (!test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
> + /*
> + * If the chip isn't operational, PWMF_REQUESTED was already cleared. So
> + * don't warn in this case. This can only happen if a consumer called
> + * pwm_put() twice.
> + */
> + if (chip->operational && !test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
> pr_warn("PWM device already freed\n");
> goto out;
> }
>
> - if (chip->ops->free)
> + if (chip->operational && chip->ops->free)
> pwm->chip->ops->free(pwm->chip, pwm);
>
> pwm->label = NULL;
> diff --git a/include/linux/pwm.h b/include/linux/pwm.h
> index 60b92c2c75ef..9c84e0ba81a4 100644
> --- a/include/linux/pwm.h
> +++ b/include/linux/pwm.h
> @@ -274,6 +274,9 @@ struct pwm_ops {
> * @of_xlate: request a PWM device given a device tree PWM specifier
> * @atomic: can the driver's ->apply() be called in atomic context
> * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip
> + * @operational: signals if the chip can be used (or is already deregistered)
> + * @nonatomic_lock: mutex for nonatomic chips
> + * @atomic_lock: mutex for atomic chips
> * @pwms: array of PWM devices allocated by the framework
> */
> struct pwm_chip {
> @@ -289,6 +292,16 @@ struct pwm_chip {
>
> /* only used internally by the PWM framework */
> bool uses_pwmchip_alloc;
> + bool operational;
> + union {
> + /*
> + * depending on the chip being atomic or not either the mutex or
> + * the spinlock is used. It protects .operational and
> + * synchronizes calls to the .ops->apply and .ops->get_state()
> + */
> + struct mutex nonatomic_lock;
> + struct spinlock atomic_lock;
> + };
> struct pwm_device pwms[] __counted_by(npwm);
> };
>

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland