[PATCH] power: remove possible deadlock when unregistering power_supply
From: Benjamin Tissoires
Date: Mon Jun 25 2018 - 03:51:58 EST
If a device gets removed right after having registered a power_supply node,
we might enter in a deadlock between the remove call (that has a lock on
the parent device) and the deferred register work.
Allow the deferred register work to exit without taking the lock when
we are in the remove state.
Stack trace on a Ubuntu 16.04:
[16072.109121] INFO: task kworker/u16:2:1180 blocked for more than 120 seconds.
[16072.109127] Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109129] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109132] kworker/u16:2 D 0 1180 2 0x80000000
[16072.109142] Workqueue: events_power_efficient power_supply_deferred_register_work
[16072.109144] Call Trace:
[16072.109152] __schedule+0x3d6/0x8b0
[16072.109155] schedule+0x36/0x80
[16072.109158] schedule_preempt_disabled+0xe/0x10
[16072.109161] __mutex_lock.isra.2+0x2ab/0x4e0
[16072.109166] __mutex_lock_slowpath+0x13/0x20
[16072.109168] ? __mutex_lock_slowpath+0x13/0x20
[16072.109171] mutex_lock+0x2f/0x40
[16072.109174] power_supply_deferred_register_work+0x2b/0x50
[16072.109179] process_one_work+0x15b/0x410
[16072.109182] worker_thread+0x4b/0x460
[16072.109186] kthread+0x10c/0x140
[16072.109189] ? process_one_work+0x410/0x410
[16072.109191] ? kthread_create_on_node+0x70/0x70
[16072.109194] ret_from_fork+0x35/0x40
[16072.109199] INFO: task test:2257 blocked for more than 120 seconds.
[16072.109202] Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109204] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109206] test D 0 2257 2256 0x00000004
[16072.109208] Call Trace:
[16072.109211] __schedule+0x3d6/0x8b0
[16072.109215] schedule+0x36/0x80
[16072.109218] schedule_timeout+0x1f3/0x360
[16072.109221] ? check_preempt_curr+0x5a/0xa0
[16072.109224] ? ttwu_do_wakeup+0x1e/0x150
[16072.109227] wait_for_completion+0xb4/0x140
[16072.109230] ? wait_for_completion+0xb4/0x140
[16072.109233] ? wake_up_q+0x70/0x70
[16072.109236] flush_work+0x129/0x1e0
[16072.109240] ? worker_detach_from_pool+0xb0/0xb0
[16072.109243] __cancel_work_timer+0x10f/0x190
[16072.109247] ? device_del+0x264/0x310
[16072.109250] ? __wake_up+0x44/0x50
[16072.109253] cancel_delayed_work_sync+0x13/0x20
[16072.109257] power_supply_unregister+0x37/0xb0
[16072.109260] devm_power_supply_release+0x11/0x20
[16072.109263] release_nodes+0x110/0x200
[16072.109266] devres_release_group+0x7c/0xb0
[16072.109274] wacom_remove+0xc2/0x110 [wacom]
[16072.109279] hid_device_remove+0x6e/0xd0 [hid]
[16072.109284] device_release_driver_internal+0x158/0x210
[16072.109288] device_release_driver+0x12/0x20
[16072.109291] bus_remove_device+0xec/0x160
[16072.109293] device_del+0x1de/0x310
[16072.109298] hid_destroy_device+0x27/0x60 [hid]
[16072.109303] usbhid_disconnect+0x51/0x70 [usbhid]
[16072.109308] usb_unbind_interface+0x77/0x270
[16072.109311] device_release_driver_internal+0x158/0x210
[16072.109315] device_release_driver+0x12/0x20
[16072.109318] usb_driver_release_interface+0x77/0x80
[16072.109321] proc_ioctl+0x20f/0x250
[16072.109325] usbdev_do_ioctl+0x57f/0x1140
[16072.109327] ? __wake_up+0x44/0x50
[16072.109331] usbdev_ioctl+0xe/0x20
[16072.109336] do_vfs_ioctl+0xa4/0x600
[16072.109339] ? vfs_write+0x15a/0x1b0
[16072.109343] SyS_ioctl+0x79/0x90
[16072.109347] entry_SYSCALL_64_fastpath+0x24/0xab
[16072.109349] RIP: 0033:0x7f20da807f47
[16072.109351] RSP: 002b:00007ffc422ae398 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[16072.109353] RAX: ffffffffffffffda RBX: 00000000010b8560 RCX: 00007f20da807f47
[16072.109355] RDX: 00007ffc422ae3a0 RSI: 00000000c0105512 RDI: 0000000000000009
[16072.109356] RBP: 0000000000000000 R08: 00007ffc422ae3e0 R09: 0000000000000010
[16072.109357] R10: 00000000000000a6 R11: 0000000000000246 R12: 0000000000000000
[16072.109359] R13: 00000000010b8560 R14: 00007ffc422ae2e0 R15: 0000000000000000
Reported-and-tested-by: Richard Hughes <rhughes@xxxxxxxxxx>
Tested-by: Aaron Skomra <Aaron.Skomra@xxxxxxxxx>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@xxxxxxxxxx>
---
drivers/power/supply/power_supply_core.c | 11 +++++++++--
include/linux/power_supply.h | 1 +
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index feac7b066e6c..20a8c2a12199 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/err.h>
@@ -139,8 +140,13 @@ static void power_supply_deferred_register_work(struct work_struct *work)
struct power_supply *psy = container_of(work, struct power_supply,
deferred_register_work.work);
- if (psy->dev.parent)
- mutex_lock(&psy->dev.parent->mutex);
+ if (psy->dev.parent) {
+ while (!mutex_trylock(&psy->dev.parent->mutex)) {
+ if (psy->removing)
+ return;
+ msleep(10);
+ }
+ }
power_supply_changed(psy);
@@ -1071,6 +1077,7 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws);
void power_supply_unregister(struct power_supply *psy)
{
WARN_ON(atomic_dec_return(&psy->use_cnt));
+ psy->removing = true;
cancel_work_sync(&psy->changed_work);
cancel_delayed_work_sync(&psy->deferred_register_work);
sysfs_remove_link(&psy->dev.kobj, "powers");
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f0139b460a72..0a587526a916 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -251,6 +251,7 @@ struct power_supply {
spinlock_t changed_lock;
bool changed;
bool initialized;
+ bool removing;
atomic_t use_cnt;
#ifdef CONFIG_THERMAL
struct thermal_zone_device *tzd;
--
2.14.3