[PATCH v3] usb/hcd: Send a uevent signaling that the host controller had died
From: Raul E Rangel
Date: Wed Apr 17 2019 - 15:03:36 EST
This change will send an OFFLINE event to udev with the ERROR=DEAD
environment variable set when the HC dies.
By notifying user space the appropriate policies can be applied.
i.e.,
* Collect error logs.
* Notify the user that USB is no longer functional.
* Perform a graceful reboot.
Signed-off-by: Raul E Rangel <rrangel@xxxxxxxxxxxx>
---
I wasn't able to find any good examples of other drivers sending a dead
notification.
Use an EVENT= format
https://github.com/torvalds/linux/blob/master/drivers/acpi/dock.c#L302
https://github.com/torvalds/linux/blob/master/drivers/net/wireless/ath/wil6210/interrupt.c#L497
Uses SDEV_MEDIA_CHANGE=
https://github.com/torvalds/linux/blob/master/drivers/scsi/scsi_lib.c#L2318
Uses ERROR=1.
https://chromium.googlesource.com/chromiumos/third_party/kernel/+/7f6d8aec5803aac44192f03dce5637b66cda7abf/drivers/input/touchscreen/atmel_mxt_ts.c#1581
I'm not a fan because it doesn't signal what the error was.
Changes in v3:
- Added documentation
- Removed use of lock and null check
- Changed event to OFFLINE + ERROR=DEAD
Changes in v2:
- Check that the root hub still exists before sending the uevent.
- Ensure died_work has completed before deallocating.
Documentation/ABI/testing/usb-uevent | 27 +++++++++++++++++++++++++++
drivers/usb/core/hcd.c | 25 +++++++++++++++++++++++++
include/linux/usb/hcd.h | 1 +
3 files changed, 53 insertions(+)
create mode 100644 Documentation/ABI/testing/usb-uevent
diff --git a/Documentation/ABI/testing/usb-uevent b/Documentation/ABI/testing/usb-uevent
new file mode 100644
index 000000000000..d35c3cad892c
--- /dev/null
+++ b/Documentation/ABI/testing/usb-uevent
@@ -0,0 +1,27 @@
+What: Raise a uevent when a USB Host Controller has died
+Date: 2019-04-17
+KernelVersion: 5.2
+Contact: linux-usb@xxxxxxxxxxxxxxx
+Description: When the USB Host Controller has entered a state where it is no
+ longer functional a uevent will be raised. The uevent will
+ contain ACTION=offline and ERROR=DEAD.
+
+ Here is an example taken using udevadm monitor -p:
+
+ KERNEL[130.428945] offline /devices/pci0000:00/0000:00:10.0/usb2 (usb)
+ ACTION=offline
+ BUSNUM=002
+ DEVNAME=/dev/bus/usb/002/001
+ DEVNUM=001
+ DEVPATH=/devices/pci0000:00/0000:00:10.0/usb2
+ DEVTYPE=usb_device
+ DRIVER=usb
+ ERROR=DEAD
+ MAJOR=189
+ MINOR=128
+ PRODUCT=1d6b/2/414
+ SEQNUM=2168
+ SUBSYSTEM=usb
+ TYPE=9/0/1
+
+Users: chromium-os-dev@xxxxxxxxxxxx
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 975d7c1288e3..145b058705fe 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2343,6 +2343,20 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
return status;
}
+
+/* Workqueue routine for when the root-hub has died. */
+static void hcd_died_work(struct work_struct *work)
+{
+ struct usb_hcd *hcd = container_of(work, struct usb_hcd, died_work);
+ char *env[] = {
+ "ERROR=DEAD",
+ NULL
+ };
+
+ /* Notify user space that the host controller has died */
+ kobject_uevent_env(&hcd->self.root_hub->dev.kobj, KOBJ_OFFLINE, env);
+}
+
/* Workqueue routine for root-hub remote wakeup */
static void hcd_resume_work(struct work_struct *work)
{
@@ -2488,6 +2502,13 @@ void usb_hc_died (struct usb_hcd *hcd)
usb_kick_hub_wq(hcd->self.root_hub);
}
}
+
+ /* Handle the case where this function gets called with a shared HCD */
+ if (usb_hcd_is_primary_hcd(hcd))
+ schedule_work(&hcd->died_work);
+ else
+ schedule_work(&hcd->primary_hcd->died_work);
+
spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
/* Make sure that the other roothub is also deallocated. */
}
@@ -2555,6 +2576,8 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
INIT_WORK(&hcd->wakeup_work, hcd_resume_work);
#endif
+ INIT_WORK(&hcd->died_work, hcd_died_work);
+
hcd->driver = driver;
hcd->speed = driver->flags & HCD_MASK;
hcd->product_desc = (driver->product_desc) ? driver->product_desc :
@@ -2908,6 +2931,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
#ifdef CONFIG_PM
cancel_work_sync(&hcd->wakeup_work);
#endif
+ cancel_work_sync(&hcd->died_work);
mutex_lock(&usb_bus_idr_lock);
usb_disconnect(&rhdev); /* Sets rhdev to NULL */
mutex_unlock(&usb_bus_idr_lock);
@@ -2968,6 +2992,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
#ifdef CONFIG_PM
cancel_work_sync(&hcd->wakeup_work);
#endif
+ cancel_work_sync(&hcd->died_work);
mutex_lock(&usb_bus_idr_lock);
usb_disconnect(&rhdev); /* Sets rhdev to NULL */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 695931b03684..66a24b13e2ab 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -98,6 +98,7 @@ struct usb_hcd {
#ifdef CONFIG_PM
struct work_struct wakeup_work; /* for remote wakeup */
#endif
+ struct work_struct died_work; /* for when the device dies */
/*
* hardware info/state
--
2.21.0.392.gf8f6787159e-goog