Re: [PATCH v1] PM: sleep: Restore asynchronous device resume optimization

From: Marek Szyprowski
Date: Wed Feb 07 2024 - 05:32:00 EST


Dear All,

On 09.01.2024 17:59, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
>
> Before commit 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core
> system-wide PM code"), the resume of devices that were allowed to resume
> asynchronously was scheduled before starting the resume of the other
> devices, so the former did not have to wait for the latter unless
> functional dependencies were present.
>
> Commit 7839d0078e0d removed that optimization in order to address a
> correctness issue, but it can be restored with the help of a new device
> power management flag, so do that now.
>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
> ---

This patch finally landed in linux-next some time ago as 3e999770ac1c
("PM: sleep: Restore asynchronous device resume optimization"). Recently
I found that it causes a non-trivial interaction with commit
5797b1c18919 ("workqueue: Implement system-wide nr_active enforcement
for unbound workqueues"). Since merge commit 954350a5f8db in linux-next
system suspend/resume fails (board doesn't wake up) on my old Samsung
Exynos4412-based Odroid-U3 board (ARM 32bit based), which was rock
stable for last years.

My further investigations confirmed that the mentioned commits are
responsible for this issue. Each of them separately (3e999770ac1c and
5797b1c18919) doesn't trigger any problems. Reverting any of them on top
of linux-next (with some additional commit due to code dependencies)
also fixes/hides the problem.

Let me know if You need more information or tests on the hardware. I'm
open to help debugging this issue.

> I said I'd probably do this in 6.9, but then I thought more about it
> and now I think it would be nice to have 6.8-rc1 without a suspend
> performance regression and the change is relatively straightforward,
> so here it goes.
>
> ---
> drivers/base/power/main.c | 117 +++++++++++++++++++++++++---------------------
> include/linux/pm.h | 1
> 2 files changed, 65 insertions(+), 53 deletions(-)
>
> Index: linux-pm/include/linux/pm.h
> ===================================================================
> --- linux-pm.orig/include/linux/pm.h
> +++ linux-pm/include/linux/pm.h
> @@ -681,6 +681,7 @@ struct dev_pm_info {
> bool wakeup_path:1;
> bool syscore:1;
> bool no_pm_callbacks:1; /* Owned by the PM core */
> + bool in_progress:1; /* Owned by the PM core */
> unsigned int must_resume:1; /* Owned by the PM core */
> unsigned int may_skip_resume:1; /* Set by subsystems */
> #else
> Index: linux-pm/drivers/base/power/main.c
> ===================================================================
> --- linux-pm.orig/drivers/base/power/main.c
> +++ linux-pm/drivers/base/power/main.c
> @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *d
> }
>
> /**
> - * __device_resume_noirq - Execute a "noirq resume" callback for given device.
> + * device_resume_noirq - Execute a "noirq resume" callback for given device.
> * @dev: Device to handle.
> * @state: PM transition of the system being carried out.
> * @async: If true, the device is being resumed asynchronously.
> @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *d
> * The driver of @dev will not receive interrupts while this function is being
> * executed.
> */
> -static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async)
> +static void device_resume_noirq(struct device *dev, pm_message_t state, bool async)
> {
> pm_callback_t callback = NULL;
> const char *info = NULL;
> @@ -674,16 +674,22 @@ static bool dpm_async_fn(struct device *
> {
> reinit_completion(&dev->power.completion);
>
> - if (!is_async(dev))
> - return false;
> + if (is_async(dev)) {
> + dev->power.in_progress = true;
>
> - get_device(dev);
> -
> - if (async_schedule_dev_nocall(func, dev))
> - return true;
> + get_device(dev);
>
> - put_device(dev);
> + if (async_schedule_dev_nocall(func, dev))
> + return true;
>
> + put_device(dev);
> + }
> + /*
> + * Because async_schedule_dev_nocall() above has returned false or it
> + * has not been called at all, func() is not running and it safe to
> + * update the in_progress flag without additional synchronization.
> + */
> + dev->power.in_progress = false;
> return false;
> }
>
> @@ -691,18 +697,10 @@ static void async_resume_noirq(void *dat
> {
> struct device *dev = data;
>
> - __device_resume_noirq(dev, pm_transition, true);
> + device_resume_noirq(dev, pm_transition, true);
> put_device(dev);
> }
>
> -static void device_resume_noirq(struct device *dev)
> -{
> - if (dpm_async_fn(dev, async_resume_noirq))
> - return;
> -
> - __device_resume_noirq(dev, pm_transition, false);
> -}
> -
> static void dpm_noirq_resume_devices(pm_message_t state)
> {
> struct device *dev;
> @@ -712,18 +710,28 @@ static void dpm_noirq_resume_devices(pm_
> mutex_lock(&dpm_list_mtx);
> pm_transition = state;
>
> + /*
> + * Trigger the resume of "async" devices upfront so they don't have to
> + * wait for the "non-async" ones they don't depend on.
> + */
> + list_for_each_entry(dev, &dpm_noirq_list, power.entry)
> + dpm_async_fn(dev, async_resume_noirq);
> +
> while (!list_empty(&dpm_noirq_list)) {
> dev = to_device(dpm_noirq_list.next);
> - get_device(dev);
> list_move_tail(&dev->power.entry, &dpm_late_early_list);
>
> - mutex_unlock(&dpm_list_mtx);
> + if (!dev->power.in_progress) {
> + get_device(dev);
>
> - device_resume_noirq(dev);
> + mutex_unlock(&dpm_list_mtx);
>
> - put_device(dev);
> + device_resume_noirq(dev, state, false);
> +
> + put_device(dev);
>
> - mutex_lock(&dpm_list_mtx);
> + mutex_lock(&dpm_list_mtx);
> + }
> }
> mutex_unlock(&dpm_list_mtx);
> async_synchronize_full();
> @@ -747,14 +755,14 @@ void dpm_resume_noirq(pm_message_t state
> }
>
> /**
> - * __device_resume_early - Execute an "early resume" callback for given device.
> + * device_resume_early - Execute an "early resume" callback for given device.
> * @dev: Device to handle.
> * @state: PM transition of the system being carried out.
> * @async: If true, the device is being resumed asynchronously.
> *
> * Runtime PM is disabled for @dev while this function is being executed.
> */
> -static void __device_resume_early(struct device *dev, pm_message_t state, bool async)
> +static void device_resume_early(struct device *dev, pm_message_t state, bool async)
> {
> pm_callback_t callback = NULL;
> const char *info = NULL;
> @@ -820,18 +828,10 @@ static void async_resume_early(void *dat
> {
> struct device *dev = data;
>
> - __device_resume_early(dev, pm_transition, true);
> + device_resume_early(dev, pm_transition, true);
> put_device(dev);
> }
>
> -static void device_resume_early(struct device *dev)
> -{
> - if (dpm_async_fn(dev, async_resume_early))
> - return;
> -
> - __device_resume_early(dev, pm_transition, false);
> -}
> -
> /**
> * dpm_resume_early - Execute "early resume" callbacks for all devices.
> * @state: PM transition of the system being carried out.
> @@ -845,18 +845,28 @@ void dpm_resume_early(pm_message_t state
> mutex_lock(&dpm_list_mtx);
> pm_transition = state;
>
> + /*
> + * Trigger the resume of "async" devices upfront so they don't have to
> + * wait for the "non-async" ones they don't depend on.
> + */
> + list_for_each_entry(dev, &dpm_late_early_list, power.entry)
> + dpm_async_fn(dev, async_resume_early);
> +
> while (!list_empty(&dpm_late_early_list)) {
> dev = to_device(dpm_late_early_list.next);
> - get_device(dev);
> list_move_tail(&dev->power.entry, &dpm_suspended_list);
>
> - mutex_unlock(&dpm_list_mtx);
> + if (!dev->power.in_progress) {
> + get_device(dev);
>
> - device_resume_early(dev);
> + mutex_unlock(&dpm_list_mtx);
>
> - put_device(dev);
> + device_resume_early(dev, state, false);
> +
> + put_device(dev);
>
> - mutex_lock(&dpm_list_mtx);
> + mutex_lock(&dpm_list_mtx);
> + }
> }
> mutex_unlock(&dpm_list_mtx);
> async_synchronize_full();
> @@ -876,12 +886,12 @@ void dpm_resume_start(pm_message_t state
> EXPORT_SYMBOL_GPL(dpm_resume_start);
>
> /**
> - * __device_resume - Execute "resume" callbacks for given device.
> + * device_resume - Execute "resume" callbacks for given device.
> * @dev: Device to handle.
> * @state: PM transition of the system being carried out.
> * @async: If true, the device is being resumed asynchronously.
> */
> -static void __device_resume(struct device *dev, pm_message_t state, bool async)
> +static void device_resume(struct device *dev, pm_message_t state, bool async)
> {
> pm_callback_t callback = NULL;
> const char *info = NULL;
> @@ -975,18 +985,10 @@ static void async_resume(void *data, asy
> {
> struct device *dev = data;
>
> - __device_resume(dev, pm_transition, true);
> + device_resume(dev, pm_transition, true);
> put_device(dev);
> }
>
> -static void device_resume(struct device *dev)
> -{
> - if (dpm_async_fn(dev, async_resume))
> - return;
> -
> - __device_resume(dev, pm_transition, false);
> -}
> -
> /**
> * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
> * @state: PM transition of the system being carried out.
> @@ -1006,16 +1008,25 @@ void dpm_resume(pm_message_t state)
> pm_transition = state;
> async_error = 0;
>
> + /*
> + * Trigger the resume of "async" devices upfront so they don't have to
> + * wait for the "non-async" ones they don't depend on.
> + */
> + list_for_each_entry(dev, &dpm_suspended_list, power.entry)
> + dpm_async_fn(dev, async_resume);
> +
> while (!list_empty(&dpm_suspended_list)) {
> dev = to_device(dpm_suspended_list.next);
>
> get_device(dev);
>
> - mutex_unlock(&dpm_list_mtx);
> + if (!dev->power.in_progress) {
> + mutex_unlock(&dpm_list_mtx);
>
> - device_resume(dev);
> + device_resume(dev, state, false);
>
> - mutex_lock(&dpm_list_mtx);
> + mutex_lock(&dpm_list_mtx);
> + }
>
> if (!list_empty(&dev->power.entry))
> list_move_tail(&dev->power.entry, &dpm_prepared_list);
>
>
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland