Re: usb/gadget: potential deadlock in gadgetfs_suspend

From: Andrey Konovalov
Date: Tue Jun 13 2017 - 06:07:01 EST


On Mon, Jun 12, 2017 at 10:25 PM, Alan Stern <stern@xxxxxxxxxxxxxxxxxxx> wrote:
>
> As you surmised, this was caused by a race. The race was between
> dummy_udc_stop() and set_link_state(), both in dummy_hcd.c. A symptom
> of this race is that the first routine clears dum->driver while the
> second dereferences it, and neither access is protected by a lock.
>
> It turns out this problem affects at least one other UDC driver
> (net2280.c). Below is a patch that adds the missing lock (and removes
> some unneeded unlocks) from these drivers. It turns out that the
> disconnect, reset, suspend, and resume callbacks should all be invoked
> while the UDC's lock is held, because they can race with gadget driver
> unbinding.
>
> Adding locked regions can lead to the possibility of deadlock or
> lockdep violations. Please let me know if the patch below raises any
> problems.

Hi Alan,

Thanks for the patch!

I've been testing with your patch applied and the "bad spinlock magic"
crashes seem to be gone. However I got another crash (happened only
once over the night), which happens during "spin_lock_irqsave
(&dev->lock, flags)":

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
(ftrace buffer empty)
Modules linked in:
CPU: 3 PID: 900 Comm: kworker/3:1 Not tainted 4.12.0-rc4+ #36
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: usb_hub_wq hub_event
task: ffff88006c5aadc0 task.stack: ffff88006c620000
RIP: 0010:__lock_acquire+0xbe5/0x3690 kernel/locking/lockdep.c:3246
RSP: 0018:ffff88006c625a80 EFLAGS: 00010006
RAX: dffffc0000000000 RBX: ffff88006c5aadc0 RCX: 0000000000000000
RDX: 0000000000000003 RSI: 0000000000000000 RDI: 1ffff1000d8c4bab
RBP: ffff88006c625fc0 R08: 0000000000000001 R09: 0000000000000001
R10: ffff88006c5ab698 R11: ffffffff87dd2e80 R12: dffffc0000000000
R13: 0000000000000001 R14: 0000000000000018 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffff88006dd00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020fdffff CR3: 000000006797e000 CR4: 00000000000006e0
Call Trace:
lock_acquire+0x22d/0x560 kernel/locking/lockdep.c:3855
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
_raw_spin_lock_irqsave+0xc9/0x110 kernel/locking/spinlock.c:159
gadgetfs_disconnect+0xf1/0x230 drivers/usb/gadget/legacy/inode.c:1664
usb_gadget_udc_reset+0x3b/0xb0 drivers/usb/gadget/udc/core.c:1020
set_link_state+0x648/0x9f0 drivers/usb/gadget/udc/dummy_hcd.c:446
dummy_hub_control+0x11bb/0x1fb0 drivers/usb/gadget/udc/dummy_hcd.c:2243
rh_call_control drivers/usb/core/hcd.c:689 [inline]
rh_urb_enqueue drivers/usb/core/hcd.c:846 [inline]
usb_hcd_submit_urb+0x92f/0x20b0 drivers/usb/core/hcd.c:1650
usb_submit_urb+0x8b2/0x12c0 drivers/usb/core/urb.c:542
usb_start_wait_urb+0x148/0x5b0 drivers/usb/core/message.c:56
usb_internal_control_msg drivers/usb/core/message.c:100 [inline]
usb_control_msg+0x341/0x4d0 drivers/usb/core/message.c:151
set_port_feature+0x73/0x90 drivers/usb/core/hub.c:422
hub_port_reset+0x277/0x1550 drivers/usb/core/hub.c:2772
hub_port_init+0x7dc/0x2940 drivers/usb/core/hub.c:4510
hub_port_connect drivers/usb/core/hub.c:4826 [inline]
hub_port_connect_change drivers/usb/core/hub.c:4999 [inline]
port_event drivers/usb/core/hub.c:5105 [inline]
hub_event+0x1ae1/0x3d40 drivers/usb/core/hub.c:5185
process_one_work+0xc08/0x1bd0 kernel/workqueue.c:2097
process_scheduled_works kernel/workqueue.c:2157 [inline]
worker_thread+0xb2b/0x1860 kernel/workqueue.c:2233
kthread+0x363/0x440 kernel/kthread.c:231
ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:424
Code: e9 03 f3 48 ab 48 81 c4 18 05 00 00 44 89 e0 5b 41 5c 41 5d 41
5e 41 5f 5d c3 4c 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80>
3c 02 00 0f 85 51 24 00 00 49 81 3e 40 ea 13 87 41 bd 00 00
RIP: __lock_acquire+0xbe5/0x3690 kernel/locking/lockdep.c:3246 RSP:
ffff88006c625a80
---[ end trace f5a7b971fc1b0546 ]---
Kernel panic - not syncing: Fatal exception
Shutting down cpus with NMI
Dumping ftrace buffer:
(ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

>
> Alan Stern
>
>
>
> Index: usb-4.x/drivers/usb/gadget/legacy/inode.c
> ===================================================================
> --- usb-4.x.orig/drivers/usb/gadget/legacy/inode.c
> +++ usb-4.x/drivers/usb/gadget/legacy/inode.c
> @@ -1679,9 +1679,10 @@ static void
> gadgetfs_suspend (struct usb_gadget *gadget)
> {
> struct dev_data *dev = get_gadget_data (gadget);
> + unsigned long flags;
>
> INFO (dev, "suspended from state %d\n", dev->state);
> - spin_lock (&dev->lock);
> + spin_lock_irqsave(&dev->lock, flags);
> switch (dev->state) {
> case STATE_DEV_SETUP: // VERY odd... host died??
> case STATE_DEV_CONNECTED:
> @@ -1692,7 +1693,7 @@ gadgetfs_suspend (struct usb_gadget *gad
> default:
> break;
> }
> - spin_unlock (&dev->lock);
> + spin_unlock_irqrestore(&dev->lock, flags);
> }
>
> static struct usb_gadget_driver gadgetfs_driver = {
> Index: usb-4.x/drivers/usb/gadget/udc/dummy_hcd.c
> ===================================================================
> --- usb-4.x.orig/drivers/usb/gadget/udc/dummy_hcd.c
> +++ usb-4.x/drivers/usb/gadget/udc/dummy_hcd.c
> @@ -442,23 +442,16 @@ static void set_link_state(struct dummy_
> /* Report reset and disconnect events to the driver */
> if (dum->driver && (disconnect || reset)) {
> stop_activity(dum);
> - spin_unlock(&dum->lock);
> if (reset)
> usb_gadget_udc_reset(&dum->gadget, dum->driver);
> else
> dum->driver->disconnect(&dum->gadget);
> - spin_lock(&dum->lock);
> }
> } else if (dum_hcd->active != dum_hcd->old_active) {
> - if (dum_hcd->old_active && dum->driver->suspend) {
> - spin_unlock(&dum->lock);
> + if (dum_hcd->old_active && dum->driver->suspend)
> dum->driver->suspend(&dum->gadget);
> - spin_lock(&dum->lock);
> - } else if (!dum_hcd->old_active && dum->driver->resume) {
> - spin_unlock(&dum->lock);
> + else if (!dum_hcd->old_active && dum->driver->resume)
> dum->driver->resume(&dum->gadget);
> - spin_lock(&dum->lock);
> - }
> }
>
> dum_hcd->old_status = dum_hcd->port_status;
> @@ -983,7 +976,9 @@ static int dummy_udc_stop(struct usb_gad
> struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g);
> struct dummy *dum = dum_hcd->dum;
>
> + spin_lock_irq(&dum->lock);
> dum->driver = NULL;
> + spin_unlock_irq(&dum->lock);
>
> return 0;
> }
> Index: usb-4.x/drivers/usb/gadget/udc/net2280.c
> ===================================================================
> --- usb-4.x.orig/drivers/usb/gadget/udc/net2280.c
> +++ usb-4.x/drivers/usb/gadget/udc/net2280.c
> @@ -2470,11 +2470,8 @@ static void stop_activity(struct net2280
> nuke(&dev->ep[i]);
>
> /* report disconnect; the driver is already quiesced */
> - if (driver) {
> - spin_unlock(&dev->lock);
> + if (driver)
> driver->disconnect(&dev->gadget);
> - spin_lock(&dev->lock);
> - }
>
> usb_reinit(dev);
> }
> @@ -3348,8 +3345,6 @@ next_endpoints:
> BIT(PCI_RETRY_ABORT_INTERRUPT))
>
> static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
> -__releases(dev->lock)
> -__acquires(dev->lock)
> {
> struct net2280_ep *ep;
> u32 tmp, num, mask, scratch;
> @@ -3390,14 +3385,12 @@ __acquires(dev->lock)
> if (disconnect || reset) {
> stop_activity(dev, dev->driver);
> ep0_start(dev);
> - spin_unlock(&dev->lock);
> if (reset)
> usb_gadget_udc_reset
> (&dev->gadget, dev->driver);
> else
> (dev->driver->disconnect)
> (&dev->gadget);
> - spin_lock(&dev->lock);
> return;
> }
> }
>