[PATCH v2] block: genhd: don't call probe function with major_names_lock held

From: Tetsuo Handa
Date: Fri Jun 18 2021 - 21:07:19 EST


syzbot is reporting circular locking problem at blk_request_module() [1],
for blk_request_module() is calling probe function with major_names_lock
held while major_names_lock is held during module's __init and __exit
functions.

loop_exit() {
mutex_lock(&loop_ctl_mutex);
blk_request_module() {
mutex_lock(&major_names_lock);
loop_probe() {
mutex_lock(&loop_ctl_mutex); // Blocked by loop_exit()
mutex_unlock(&loop_ctl_mutex);
}
mutex_unlock(&major_names_lock);
unregister_blkdev() {
mutex_lock(&major_names_lock); // Blocked by blk_request_module()
mutex_unlock(&major_names_lock);
}
mutex_unlock(&loop_ctl_mutex);
}
}

Based on an assumption that a probe callback passed to __register_blkdev()
belongs to a module which calls __register_blkdev(), drop major_names_lock
before calling probe function by holding a reference to that module which
contains that probe function. If there is a module where this assumption
does not hold, such module can call ____register_blkdev() directly.

blk_request_module() {
mutex_lock(&major_names_lock);
// Block loop_exit()
mutex_unlock(&major_names_lock);
loop_probe() {
mutex_lock(&loop_ctl_mutex);
mutex_unlock(&loop_ctl_mutex);
}
// Unblock loop_exit()
}
loop_exit() {
mutex_lock(&loop_ctl_mutex);
unregister_blkdev() {
mutex_lock(&major_names_lock);
mutex_unlock(&major_names_lock);
}
mutex_unlock(&loop_ctl_mutex);
}

Note that regardless of this patch, it is up to probe function to
serialize module's __init function and probe function in that module
by using e.g. a mutex. This patch simply makes sure that module's __exit
function won't be called when probe function is about to be called.

While Desmond Cheong Zhi Xi already proposed a patch for breaking ABCD
circular dependency [2], I consider that this patch is still needed for
safely breaking AB-BA dependency upon module unloading. (Note that syzbot
does not test module unloading code because the syzbot kernels are built
with almost all modules built-in. We need manual inspection.)

By doing kmalloc(GFP_KERNEL) in ____register_blkdev() before holding
major_names_lock, we could convert major_names_lock from a mutex to
a spinlock. But that is beyond the scope of this patch.

Link: https://syzkaller.appspot.com/bug?id=7bd106c28e846d1023d4ca915718b1a0905444cb [1]
Link: https://lkml.kernel.org/r/20210617160904.570111-1-desmondcheongzx@xxxxxxxxx [2]
Reported-by: syzbot <syzbot+6a8a0d93c91e8fbf2e80@xxxxxxxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Tested-by: syzbot <syzbot+6a8a0d93c91e8fbf2e80@xxxxxxxxxxxxxxxxxxxxxxxxx>
---
block/genhd.c | 36 +++++++++++++++++++++++++++---------
include/linux/genhd.h | 8 +++++---
2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 9f8cb7beaad1..9577c70a6bd3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -169,6 +169,7 @@ static struct blk_major_name {
int major;
char name[16];
void (*probe)(dev_t devt);
+ struct module *owner;
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);

@@ -197,7 +198,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
* @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
* @major = 0, try to allocate any unused major number.
* @name: the name of the new block device as a zero terminated string
- * @probe: allback that is called on access to any minor number of @major
+ * @probe: callback that is called on access to any minor number of @major
+ * @owner: the owner of @probe function (i.e. THIS_MODULE or NULL).
*
* The @name must be unique within the system.
*
@@ -214,8 +216,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
*
* Use register_blkdev instead for any new code.
*/
-int __register_blkdev(unsigned int major, const char *name,
- void (*probe)(dev_t devt))
+int ____register_blkdev(unsigned int major, const char *name,
+ void (*probe)(dev_t devt), struct module *owner)
{
struct blk_major_name **n, *p;
int index, ret = 0;
@@ -255,6 +257,7 @@ int __register_blkdev(unsigned int major, const char *name,

p->major = major;
p->probe = probe;
+ p->owner = owner;
strlcpy(p->name, name, sizeof(p->name));
p->next = NULL;
index = major_to_index(major);
@@ -277,7 +280,7 @@ int __register_blkdev(unsigned int major, const char *name,
mutex_unlock(&major_names_lock);
return ret;
}
-EXPORT_SYMBOL(__register_blkdev);
+EXPORT_SYMBOL(____register_blkdev);

void unregister_blkdev(unsigned int major, const char *name)
{
@@ -676,14 +679,29 @@ void blk_request_module(dev_t devt)
{
unsigned int major = MAJOR(devt);
struct blk_major_name **n;
+ void (*probe_fn)(dev_t devt);

mutex_lock(&major_names_lock);
for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
- if ((*n)->major == major && (*n)->probe) {
- (*n)->probe(devt);
- mutex_unlock(&major_names_lock);
- return;
- }
+ if ((*n)->major != major || !(*n)->probe)
+ continue;
+ if (!try_module_get((*n)->owner))
+ break;
+ /*
+ * Calling probe function with major_names_lock held causes
+ * circular locking dependency problem. Thus, call it after
+ * releasing major_names_lock.
+ */
+ probe_fn = (*n)->probe;
+ mutex_unlock(&major_names_lock);
+ /*
+ * Assuming that unregister_blkdev() is called from module's
+ * __exit function, a module refcount taken above allows us
+ * to safely call probe function without major_names_lock held.
+ */
+ probe_fn(devt);
+ module_put((*n)->owner);
+ return;
}
mutex_unlock(&major_names_lock);

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6fc26f7bdf71..070b73c043e6 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -277,10 +277,12 @@ extern void put_disk(struct gendisk *disk);

#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)

-int __register_blkdev(unsigned int major, const char *name,
- void (*probe)(dev_t devt));
+int ____register_blkdev(unsigned int major, const char *name,
+ void (*probe)(dev_t devt), struct module *owner);
+#define __register_blkdev(major, name, probe) \
+ ____register_blkdev(major, name, probe, THIS_MODULE)
#define register_blkdev(major, name) \
- __register_blkdev(major, name, NULL)
+ ____register_blkdev(major, name, NULL, NULL)
void unregister_blkdev(unsigned int major, const char *name);

bool bdev_check_media_change(struct block_device *bdev);
--
2.18.4