Re: [BUG] mutex deadlock of dpm_resume() in low memory situation

From: Rafael J. Wysocki
Date: Wed Dec 27 2023 - 14:39:34 EST


On Wednesday, December 27, 2023 5:08:40 PM CET Greg KH wrote:
> On Wed, Dec 27, 2023 at 05:42:50PM +0900, Youngmin Nam wrote:
> > Could you look into this issue ?
>
> Can you submit a patch that resolves the issue for you, as you have a
> way to actually test this out? That would be the quickest way to get it
> resolved, and to help confirm that this is even an issue at all.

Something like the appended patch should be sufficient to address this AFAICS.

I haven't tested it yet (will do so shortly), so all of the usual disclaimers
apply.

I think that it can be split into 2 patches, but for easier testing here
it goes in one piece.

Fixes: f2a424f6c613 ("PM / core: Introduce dpm_async_fn() helper")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
---
drivers/base/power/main.c | 12 ++++--
include/linux/async.h | 2 +
kernel/async.c | 85 ++++++++++++++++++++++++++++++++++------------
3 files changed, 73 insertions(+), 26 deletions(-)

Index: linux-pm/kernel/async.c
===================================================================
--- linux-pm.orig/kernel/async.c
+++ linux-pm/kernel/async.c
@@ -145,6 +145,39 @@ static void async_run_entry_fn(struct wo
wake_up(&async_done);
}

+static async_cookie_t __async_schedule_node_domain(async_func_t func,
+ void *data, int node,
+ struct async_domain *domain,
+ struct async_entry *entry)
+{
+ async_cookie_t newcookie;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&entry->domain_list);
+ INIT_LIST_HEAD(&entry->global_list);
+ INIT_WORK(&entry->work, async_run_entry_fn);
+ entry->func = func;
+ entry->data = data;
+ entry->domain = domain;
+
+ spin_lock_irqsave(&async_lock, flags);
+
+ /* allocate cookie and queue */
+ newcookie = entry->cookie = next_cookie++;
+
+ list_add_tail(&entry->domain_list, &domain->pending);
+ if (domain->registered)
+ list_add_tail(&entry->global_list, &async_global_pending);
+
+ atomic_inc(&entry_count);
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* schedule for execution */
+ queue_work_node(node, system_unbound_wq, &entry->work);
+
+ return newcookie;
+}
+
/**
* async_schedule_node_domain - NUMA specific version of async_schedule_domain
* @func: function to execute asynchronously
@@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domai
func(data, newcookie);
return newcookie;
}
- INIT_LIST_HEAD(&entry->domain_list);
- INIT_LIST_HEAD(&entry->global_list);
- INIT_WORK(&entry->work, async_run_entry_fn);
- entry->func = func;
- entry->data = data;
- entry->domain = domain;
-
- spin_lock_irqsave(&async_lock, flags);
-
- /* allocate cookie and queue */
- newcookie = entry->cookie = next_cookie++;
-
- list_add_tail(&entry->domain_list, &domain->pending);
- if (domain->registered)
- list_add_tail(&entry->global_list, &async_global_pending);
-
- atomic_inc(&entry_count);
- spin_unlock_irqrestore(&async_lock, flags);
-
- /* schedule for execution */
- queue_work_node(node, system_unbound_wq, &entry->work);

- return newcookie;
+ return __async_schedule_node_domain(func, data, node, domain, entry);
}
EXPORT_SYMBOL_GPL(async_schedule_node_domain);

@@ -232,6 +244,35 @@ async_cookie_t async_schedule_node(async
EXPORT_SYMBOL_GPL(async_schedule_node);

/**
+ * async_schedule_dev_nocall - A simplified variant of async_schedule_dev()
+ * @func: function to execute asynchronously
+ * @dev: device argument to be passed to function
+ *
+ * @dev is used as both the argument for the function and to provide NUMA
+ * context for where to run the function.
+ *
+ * If the asynchronous execution of @func is scheduled successfully, return
+ * true. Otherwise, do nothing and return false, unlike async_schedule_dev()
+ * that will run the function synchronously then.
+ */
+bool async_schedule_dev_nocall(async_func_t func, struct device *dev)
+{
+ struct async_entry *entry;
+
+ entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL);
+
+ /* Give up if there is no memory or too much work. */
+ if (!entry || atomic_read(&entry_count) > MAX_WORK) {
+ kfree(entry);
+ return false;
+ }
+
+ __async_schedule_node_domain(func, dev, dev_to_node(dev),
+ &async_dfl_domain, entry);
+ return true;
+}
+
+/**
* async_synchronize_full - synchronize all asynchronous function calls
*
* This function waits until all asynchronous function calls have been done.
Index: linux-pm/include/linux/async.h
===================================================================
--- linux-pm.orig/include/linux/async.h
+++ linux-pm/include/linux/async.h
@@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, st
return async_schedule_node(func, dev, dev_to_node(dev));
}

+bool async_schedule_dev_nocall(async_func_t func, struct device *dev);
+
/**
* async_schedule_dev_domain - A device specific version of async_schedule_domain
* @func: function to execute asynchronously
Index: linux-pm/drivers/base/power/main.c
===================================================================
--- linux-pm.orig/drivers/base/power/main.c
+++ linux-pm/drivers/base/power/main.c
@@ -668,11 +668,15 @@ static bool dpm_async_fn(struct device *
{
reinit_completion(&dev->power.completion);

- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(func, dev);
+ if (!is_async(dev))
+ return false;
+
+ get_device(dev);
+
+ if (async_schedule_dev_nocall(func, dev))
return true;
- }
+
+ put_device(dev);

return false;
}