Re: [REGRESSION] 2.6.24/25: random lockups when accessing externalUSB harddrive
From: Stefan Becker
Date: Thu Jun 26 2008 - 18:15:45 EST
Hi,
ext Alan Stern wrote:
Do you have any of the RT patches installed?
No.
Something else you should try is clearing your "owner" string just
before the spinlock is released. You could also add a check after the
release; if the spinlock can't be locked again immediately then
something is wrong.
Yes, the initial try was misleading. I tinkered around a little bit more
and finally figured out that it is usb_hcd_unlink_urb_from_ep() itself
that is called with interrupts enabled!
So with this code in place the error disappears:
void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb)
{
/* clear all state linking urb to this dev (and hcd) */
unsigned int flags;
spin_lock_irqsave(&hcd_urb_list_lock, flags);
list_del_init(&urb->urb_list);
spin_unlock_irqrestore(&hcd_urb_list_lock, flags);
}
This seems to impact USB performance though. In 2.6.23 (without the
problem) I get 21MB/s with dd, but with the above "fix" only 14MB/s. But
I'll recheck once we have a real error fix in place.
After that I added the following code
if (!raw_irqs_disabled()) {
printk(KERN_CRIT "usb_hcd_unlink_urb_from_ep called with interrupts
enabled!\n");
dump_stack();
}
and collected the attached kernel messages. I checked the messages
briefly and it seems that the following code paths have the interrupts
enabled when calling usb_hcd_unlink_urb_from_ep():
[<c0574d9d>] usb_hcd_unlink_urb_from_ep+0x25/0x6b
[<de850559>] uhci_giveback_urb+0xcd/0x1e3 [uhci_hcd]
[<de850e02>] uhci_scan_schedule+0x511/0x720 [uhci_hcd]
...
[<de8529c3>] uhci_irq+0x131/0x142 [uhci_hcd]
[<c05750cb>] usb_hcd_irq+0x23/0x51
and
[<c0574d9d>] usb_hcd_unlink_urb_from_ep+0x25/0x6b
[<de839d55>] ehci_urb_done+0x73/0x92 [ehci_hcd]
[<de83a92f>] qh_completions+0x373/0x3eb [ehci_hcd]
[<de83aa43>] ehci_work+0x9c/0x6a9 [ehci_hcd]
...
[<de83ec3c>] ehci_irq+0x241/0x265 [ehci_hcd]
...
[<c05750cb>] usb_hcd_irq+0x23/0x51
Is that enough information to fix the problem?
Regards,
Stefan
---
Stefan Becker
E-Mail: Stefan.Becker@xxxxxxxxx
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 09a53e7..400433c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -45,6 +45,14 @@
#include "hcd.h"
#include "hub.h"
+#ifdef DEBUG
+const char *_urb_list_holder = "NONE";
+#define URB_LIST_HOLDER(n) _urb_list_holder = #n;
+#define URB_LIST_RELEASE _urb_list_holder = "FREE";
+#else
+#define URB_LIST_HOLDER(n)
+#define URB_LIST_RELEASE
+#endif
/*-------------------------------------------------------------------------*/
@@ -1001,7 +1009,12 @@ int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb)
{
int rc = 0;
+#ifdef DEBUG
+ if (!raw_irqs_disabled()) dump_stack();
+#endif
+
spin_lock(&hcd_urb_list_lock);
+ URB_LIST_HOLDER(usb_hcd_link_urb_to_ep)
/* Check that the URB isn't being killed */
if (unlikely(urb->reject)) {
@@ -1034,6 +1047,7 @@ int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb)
goto done;
}
done:
+ URB_LIST_RELEASE
spin_unlock(&hcd_urb_list_lock);
return rc;
}
@@ -1107,9 +1121,33 @@ EXPORT_SYMBOL_GPL(usb_hcd_check_unlink_urb);
void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb)
{
/* clear all state linking urb to this dev (and hcd) */
+#ifdef DEBUG
+#if 0
+ if (!spin_trylock(&hcd_urb_list_lock)) {
+ int i;
+ printk(KERN_CRIT "HCD URB list locked by %s!\n", _urb_list_holder);
+ dump_stack();
+ for (int i = 0; i < 100; i++) schedule();
+ }
+#endif
+ unsigned int flags;
+
+ if (!raw_irqs_disabled()) {
+ printk(KERN_CRIT "usb_hcd_unlink_urb_from_ep called with interrupts enabled!\n");
+ dump_stack();
+ }
+ spin_lock_irqsave(&hcd_urb_list_lock, flags);
+#else
spin_lock(&hcd_urb_list_lock);
+#endif
+ URB_LIST_HOLDER(usb_hcd_unlink_urb_from_ep)
list_del_init(&urb->urb_list);
+ URB_LIST_RELEASE
+#ifdef DEBUG
+ spin_unlock_irqrestore(&hcd_urb_list_lock, flags);
+#else
spin_unlock(&hcd_urb_list_lock);
+#endif
}
EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep);
@@ -1448,6 +1486,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev,
/* No more submits can occur */
spin_lock_irq(&hcd_urb_list_lock);
+ URB_LIST_HOLDER(usb_hcd_flush_endpoint_INITIAL)
rescan:
list_for_each_entry (urb, &ep->urb_list, urb_list) {
int is_in;
@@ -1456,6 +1495,7 @@ rescan:
continue;
usb_get_urb (urb);
is_in = usb_urb_dir_in(urb);
+ URB_LIST_RELEASE
spin_unlock(&hcd_urb_list_lock);
/* kick hcd */
@@ -1482,13 +1522,16 @@ rescan:
/* list contents may have changed */
spin_lock(&hcd_urb_list_lock);
+ URB_LIST_HOLDER(usb_hcd_flush_endpoint_RESCAN)
goto rescan;
}
+ URB_LIST_RELEASE
spin_unlock_irq(&hcd_urb_list_lock);
/* Wait until the endpoint queue is completely empty */
while (!list_empty (&ep->urb_list)) {
spin_lock_irq(&hcd_urb_list_lock);
+ URB_LIST_HOLDER(usb_hcd_flush_endpoint_LIST)
/* The list may have changed while we acquired the spinlock */
urb = NULL;
@@ -1497,6 +1540,7 @@ rescan:
urb_list);
usb_get_urb (urb);
}
+ URB_LIST_RELEASE
spin_unlock_irq(&hcd_urb_list_lock);
if (urb) {
Attachment:
dump_stack.txt.bz2
Description: application/bzip