[PATCH 3.16 038/204] USB: g_mass_storage: Fix deadlock when driver is unbound

From: Ben Hutchings
Date: Thu Dec 28 2017 - 12:33:36 EST


3.16.52-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>

commit 1fbbb78f25d1291274f320462bf6908906f538db upstream.

As a holdover from the old g_file_storage gadget, the g_mass_storage
legacy gadget driver attempts to unregister itself when its main
operating thread terminates (if it hasn't been unregistered already).
This is not strictly necessary; it was never more than an attempt to
have the gadget fail cleanly if something went wrong and the main
thread was killed.

However, now that the UDC core manages gadget drivers independently of
UDC drivers, this scheme doesn't work any more. A simple test:

modprobe dummy-hcd
modprobe g-mass-storage file=...
rmmod dummy-hcd

ends up in a deadlock with the following backtrace:

sysrq: SysRq : Show Blocked State
task PC stack pid father
file-storage D 0 1130 2 0x00000000
Call Trace:
__schedule+0x53e/0x58c
schedule+0x6e/0x77
schedule_preempt_disabled+0xd/0xf
__mutex_lock.isra.1+0x129/0x224
? _raw_spin_unlock_irqrestore+0x12/0x14
__mutex_lock_slowpath+0x12/0x14
mutex_lock+0x28/0x2b
usb_gadget_unregister_driver+0x29/0x9b [udc_core]
usb_composite_unregister+0x10/0x12 [libcomposite]
msg_cleanup+0x1d/0x20 [g_mass_storage]
msg_thread_exits+0xd/0xdd7 [g_mass_storage]
fsg_main_thread+0x1395/0x13d6 [usb_f_mass_storage]
? __schedule+0x573/0x58c
kthread+0xd9/0xdb
? do_set_interface+0x25c/0x25c [usb_f_mass_storage]
? init_completion+0x1e/0x1e
ret_from_fork+0x19/0x24
rmmod D 0 1155 683 0x00000000
Call Trace:
__schedule+0x53e/0x58c
schedule+0x6e/0x77
schedule_timeout+0x26/0xbc
? __schedule+0x573/0x58c
do_wait_for_common+0xb3/0x128
? usleep_range+0x81/0x81
? wake_up_q+0x3f/0x3f
wait_for_common+0x2e/0x45
wait_for_completion+0x17/0x19
fsg_common_put+0x34/0x81 [usb_f_mass_storage]
fsg_free_inst+0x13/0x1e [usb_f_mass_storage]
usb_put_function_instance+0x1a/0x25 [libcomposite]
msg_unbind+0x2a/0x42 [g_mass_storage]
__composite_unbind+0x4a/0x6f [libcomposite]
composite_unbind+0x12/0x14 [libcomposite]
usb_gadget_remove_driver+0x4f/0x77 [udc_core]
usb_del_gadget_udc+0x52/0xcc [udc_core]
dummy_udc_remove+0x27/0x2c [dummy_hcd]
platform_drv_remove+0x1d/0x31
device_release_driver_internal+0xe9/0x16d
device_release_driver+0x11/0x13
bus_remove_device+0xd2/0xe2
device_del+0x19f/0x221
? selinux_capable+0x22/0x27
platform_device_del+0x21/0x63
platform_device_unregister+0x10/0x1a
cleanup+0x20/0x817 [dummy_hcd]
SyS_delete_module+0x10c/0x197
? ____fput+0xd/0xf
? task_work_run+0x55/0x62
? prepare_exit_to_usermode+0x65/0x75
do_fast_syscall_32+0x86/0xc3
entry_SYSENTER_32+0x4e/0x7c

What happens is that removing the dummy-hcd driver causes the UDC core
to unbind the gadget driver, which it does while holding the udc_lock
mutex. The unbind routine in g_mass_storage tells the main thread to
exit and waits for it to terminate.

But as mentioned above, when the main thread exits it tries to
unregister the mass-storage function driver. Via the composite
framework this ends up calling usb_gadget_unregister_driver(), which
tries to acquire the udc_lock mutex. The result is deadlock.

The simplest way to fix the problem is not to be so clever: The main
thread doesn't have to unregister the function driver. The side
effects won't be so terrible; if the gadget is still attached to a USB
host when the main thread is killed, it will appear to the host as
though the gadget's firmware has crashed -- a reasonably accurate
interpretation, and an all-too-common occurrence for USB mass-storage
devices.

In fact, the code to unregister the driver when the main thread exits
is specific to g-mass-storage; it is not used when f-mass-storage is
included as a function in a larger composite device. Therefore the
entire mechanism responsible for this (the fsg_operations structure
with its ->thread_exits method, the fsg_common_set_ops() routine, and
the msg_thread_exits() callback routine) can all be eliminated. Even
the msg_registered bitflag can be removed, because now the driver is
unregistered in only one place rather than in two places.

Signed-off-by: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>
Acked-by: Felipe Balbi <felipe.balbi@xxxxxxxxxxxxxxx>
Acked-by: Michal Nazarewicz <mina86@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
[bwh: Backported to 3.16:
- Preserve the old way of iterating over LUNs in fsg_main_thread() cleanup
- Adjust filename, context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -307,8 +307,6 @@ struct fsg_common {
struct completion thread_notifier;
struct task_struct *thread_task;

- /* Callback functions. */
- const struct fsg_operations *ops;
/* Gadget's private data. */
void *private_data;

@@ -2498,6 +2496,8 @@ static void handle_exception(struct fsg_
static int fsg_main_thread(void *common_)
{
struct fsg_common *common = common_;
+ struct fsg_lun **curlun_it;
+ unsigned i;

/*
* Allow the thread to be killed by a signal, but set the signal mask
@@ -2559,22 +2559,18 @@ static int fsg_main_thread(void *common_
common->thread_task = NULL;
spin_unlock_irq(&common->lock);

- if (!common->ops || !common->ops->thread_exits
- || common->ops->thread_exits(common) < 0) {
- struct fsg_lun **curlun_it = common->luns;
- unsigned i = common->nluns;
-
- down_write(&common->filesem);
- for (; i--; ++curlun_it) {
- struct fsg_lun *curlun = *curlun_it;
- if (!curlun || !fsg_lun_is_open(curlun))
- continue;
+ /* Eject media from all LUNs */
+ curlun_it = common->luns;
+ i = common->nluns;
+
+ down_write(&common->filesem);
+ for (; i--; ++curlun_it) {
+ struct fsg_lun *curlun = *curlun_it;

+ if (curlun && fsg_lun_is_open(curlun))
fsg_lun_close(curlun);
- curlun->unit_attention_data = SS_MEDIUM_NOT_PRESENT;
- }
- up_write(&common->filesem);
}
+ up_write(&common->filesem);

/* Let fsg_unbind() know the thread has exited */
complete_and_exit(&common->thread_notifier, 0);
@@ -2842,13 +2838,6 @@ int fsg_common_set_nluns(struct fsg_comm
}
EXPORT_SYMBOL_GPL(fsg_common_set_nluns);

-void fsg_common_set_ops(struct fsg_common *common,
- const struct fsg_operations *ops)
-{
- common->ops = ops;
-}
-EXPORT_SYMBOL_GPL(fsg_common_set_ops);
-
void fsg_common_free_buffers(struct fsg_common *common)
{
_fsg_common_free_buffers(common->buffhds, common->fsg_num_buffers);
--- a/drivers/usb/gadget/f_mass_storage.h
+++ b/drivers/usb/gadget/f_mass_storage.h
@@ -60,17 +60,6 @@ struct fsg_module_parameters {
struct fsg_common;

/* FSF callback functions */
-struct fsg_operations {
- /*
- * Callback function to call when thread exits. If no
- * callback is set or it returns value lower then zero MSF
- * will force eject all LUNs it operates on (including those
- * marked as non-removable or with prevent_medium_removal flag
- * set).
- */
- int (*thread_exits)(struct fsg_common *common);
-};
-
struct fsg_lun_opts {
struct config_group group;
struct fsg_lun *lun;
@@ -145,9 +134,6 @@ void fsg_common_free_luns(struct fsg_com

int fsg_common_set_nluns(struct fsg_common *common, int nluns);

-void fsg_common_set_ops(struct fsg_common *common,
- const struct fsg_operations *ops);
-
int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg,
unsigned int id, const char *name,
const char **name_pfx);
--- a/drivers/usb/gadget/mass_storage.c
+++ b/drivers/usb/gadget/mass_storage.c
@@ -121,15 +121,6 @@ static unsigned int fsg_num_buffers = CO

FSG_MODULE_PARAMETERS(/* no prefix */, mod_data);

-static unsigned long msg_registered;
-static void msg_cleanup(void);
-
-static int msg_thread_exits(struct fsg_common *common)
-{
- msg_cleanup();
- return 0;
-}
-
static int __init msg_do_config(struct usb_configuration *c)
{
struct fsg_opts *opts;
@@ -172,9 +163,6 @@ static struct usb_configuration msg_conf

static int __init msg_bind(struct usb_composite_dev *cdev)
{
- static const struct fsg_operations ops = {
- .thread_exits = msg_thread_exits,
- };
struct fsg_opts *opts;
struct fsg_config config;
int status;
@@ -195,8 +183,6 @@ static int __init msg_bind(struct usb_co
if (status)
goto fail_set_nluns;

- fsg_common_set_ops(opts->common, &ops);
-
status = fsg_common_set_cdev(opts->common, cdev, config.can_stall);
if (status)
goto fail_set_cdev;
@@ -221,7 +207,6 @@ static int __init msg_bind(struct usb_co
usb_composite_overwrite_options(cdev, &coverwrite);
dev_info(&cdev->gadget->dev,
DRIVER_DESC ", version: " DRIVER_VERSION "\n");
- set_bit(0, &msg_registered);
return 0;

fail_string_ids:
@@ -268,9 +253,8 @@ static int __init msg_init(void)
}
module_init(msg_init);

-static void msg_cleanup(void)
+static void __exit msg_cleanup(void)
{
- if (test_and_clear_bit(0, &msg_registered))
- usb_composite_unregister(&msg_driver);
+ usb_composite_unregister(&msg_driver);
}
module_exit(msg_cleanup);