Re: [Linux kernel bug] general protection fault in disable_store

From: Alan Stern
Date: Fri Apr 12 2024 - 10:40:50 EST


On Fri, Apr 12, 2024 at 09:08:12PM +0800, Sam Sun wrote:
> Sorry for the mistake I made when debugging this bug. Now I have more
> information about it. Disassembly of function disable_store() in the
> latest upstream kernel is listed below.
> ```
> Dump of assembler code for function disable_store:
> ...
> 0xffffffff86e907eb <+187>: lea -0x8(%r14),%r12
> 0xffffffff86e907ef <+191>: mov (%rbx),%rax
> 0xffffffff86e907f2 <+194>: mov %rax,0x20(%rsp)
> 0xffffffff86e907f7 <+199>: lea -0xa8(%rax),%rdi
> 0xffffffff86e907fe <+206>: mov %rdi,0x18(%rsp)
> 0xffffffff86e90803 <+211>: call 0xffffffff86e20220
> <usb_hub_to_struct_hub>
> 0xffffffff86e90808 <+216>: mov %rax,%rbx
> 0xffffffff86e9080b <+219>: shr $0x3,%rax
> 0xffffffff86e9080f <+223>: movabs $0xdffffc0000000000,%rcx
> 0xffffffff86e90819 <+233>: cmpb $0x0,(%rax,%rcx,1)
> 0xffffffff86e9081d <+237>: je 0xffffffff86e90827 <disable_store+247>
> 0xffffffff86e9081f <+239>: mov %rbx,%rdi
> 0xffffffff86e90822 <+242>: call 0xffffffff81eeb0b0
> <__asan_report_load8_noabort>
> 0xffffffff86e90827 <+247>: lea 0x60(%rsp),%rsi
> ...
> ```
> The cmpb in disable_store()<+233> is generated by KASAN to check the
> shadow memory status. If equals 0, which means the load 8 is valid,
> pass the KASAN check. However, this time rax is 0, so it first
> triggers general protection fault, since 0xdffffc0000000000 is not a
> valid address. rax contains the return address of function
> usb_hub_to_struct_hub(), in this case is a NULL.
>
> In function usb_hub_to_struct_hub(), I checked hdev and its sub
> domains, and they are not NULL. Is it possible that
> usb_deauthorized_device() set
> hdev->actconfig->interface[0]->dev.driver_data to NULL? I cannot
> confirm that since every time I try to breakpoint the code it crashes
> differently.

I suspect the usb_hub_to_struct_hub() call is racing with the
spinlock-protected region in hub_disconnect() (in hub.c).

> If there is any other thing I could help, please let me know.

Try the patch below. It should eliminate that race, which hopefully
will fix the problem.

Alan Stern



Index: usb-devel/drivers/usb/core/hub.c
===================================================================
--- usb-devel.orig/drivers/usb/core/hub.c
+++ usb-devel/drivers/usb/core/hub.c
@@ -72,6 +72,9 @@
* change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
static DEFINE_SPINLOCK(device_state_lock);

+/* Protect hdev->maxchild and hub's intfdata */
+static DEFINE_SPINLOCK(hub_state_lock);
+
/* workqueue to process hub events */
static struct workqueue_struct *hub_wq;
static void hub_event(struct work_struct *work);
@@ -152,9 +155,13 @@ static inline char *portspeed(struct usb
/* Note that hdev or one of its children must be locked! */
struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev)
{
- if (!hdev || !hdev->actconfig || !hdev->maxchild)
- return NULL;
- return usb_get_intfdata(hdev->actconfig->interface[0]);
+ struct usb_hub *hub = NULL;
+
+ spin_lock_irq(&hub_state_lock);
+ if (hdev && hdev->actconfig && hdev->maxchild)
+ hub = usb_get_intfdata(hdev->actconfig->interface[0]);
+ spin_unlock_irq(&hub_state_lock);
+ return hub;
}

int usb_device_supports_lpm(struct usb_device *udev)
@@ -1714,7 +1721,9 @@ static int hub_configure(struct usb_hub
break;
}
}
+ spin_lock_irq(&hub_state_lock);
hdev->maxchild = i;
+ spin_unlock_irq(&hub_state_lock);
for (i = 0; i < hdev->maxchild; i++) {
struct usb_port *port_dev = hub->ports[i];

@@ -1790,9 +1799,11 @@ static void hub_disconnect(struct usb_in

/* Avoid races with recursively_mark_NOTATTACHED() */
spin_lock_irq(&device_state_lock);
+ spin_lock(&hub_state_lock);
port1 = hdev->maxchild;
hdev->maxchild = 0;
usb_set_intfdata(intf, NULL);
+ spin_unlock(&hub_state_lock);
spin_unlock_irq(&device_state_lock);

for (; port1 > 0; --port1)