[PATCH AUTOSEL 5.14 15/21] block: genhd: don't call blkdev_show() with major_names_lock held
From: Sasha Levin
Date: Thu Sep 16 2021 - 22:34:49 EST
From: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
[ Upstream commit dfbb3409b27fa42b96f5727a80d3ceb6a8663991 ]
If CONFIG_BLK_DEV_LOOP && CONFIG_MTD (at least; there might be other
combinations), lockdep complains circular locking dependency at
__loop_clr_fd(), for major_names_lock serves as a locking dependency
aggregating hub across multiple block modules.
======================================================
WARNING: possible circular locking dependency detected
5.14.0+ #757 Tainted: G E
------------------------------------------------------
systemd-udevd/7568 is trying to acquire lock:
ffff88800f334d48 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x70/0x560
but task is already holding lock:
ffff888014a7d4a0 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x4d/0x400 [loop]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #6 (&lo->lo_mutex){+.+.}-{3:3}:
lock_acquire+0xbe/0x1f0
__mutex_lock_common+0xb6/0xe10
mutex_lock_killable_nested+0x17/0x20
lo_open+0x23/0x50 [loop]
blkdev_get_by_dev+0x199/0x540
blkdev_open+0x58/0x90
do_dentry_open+0x144/0x3a0
path_openat+0xa57/0xda0
do_filp_open+0x9f/0x140
do_sys_openat2+0x71/0x150
__x64_sys_openat+0x78/0xa0
do_syscall_64+0x3d/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #5 (&disk->open_mutex){+.+.}-{3:3}:
lock_acquire+0xbe/0x1f0
__mutex_lock_common+0xb6/0xe10
mutex_lock_nested+0x17/0x20
bd_register_pending_holders+0x20/0x100
device_add_disk+0x1ae/0x390
loop_add+0x29c/0x2d0 [loop]
blk_request_module+0x5a/0xb0
blkdev_get_no_open+0x27/0xa0
blkdev_get_by_dev+0x5f/0x540
blkdev_open+0x58/0x90
do_dentry_open+0x144/0x3a0
path_openat+0xa57/0xda0
do_filp_open+0x9f/0x140
do_sys_openat2+0x71/0x150
__x64_sys_openat+0x78/0xa0
do_syscall_64+0x3d/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #4 (major_names_lock){+.+.}-{3:3}:
lock_acquire+0xbe/0x1f0
__mutex_lock_common+0xb6/0xe10
mutex_lock_nested+0x17/0x20
blkdev_show+0x19/0x80
devinfo_show+0x52/0x60
seq_read_iter+0x2d5/0x3e0
proc_reg_read_iter+0x41/0x80
vfs_read+0x2ac/0x330
ksys_read+0x6b/0xd0
do_syscall_64+0x3d/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #3 (&p->lock){+.+.}-{3:3}:
lock_acquire+0xbe/0x1f0
__mutex_lock_common+0xb6/0xe10
mutex_lock_nested+0x17/0x20
seq_read_iter+0x37/0x3e0
generic_file_splice_read+0xf3/0x170
splice_direct_to_actor+0x14e/0x350
do_splice_direct+0x84/0xd0
do_sendfile+0x263/0x430
__se_sys_sendfile64+0x96/0xc0
do_syscall_64+0x3d/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #2 (sb_writers#3){.+.+}-{0:0}:
lock_acquire+0xbe/0x1f0
lo_write_bvec+0x96/0x280 [loop]
loop_process_work+0xa68/0xc10 [loop]
process_one_work+0x293/0x480
worker_thread+0x23d/0x4b0
kthread+0x163/0x180
ret_from_fork+0x1f/0x30
-> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}:
lock_acquire+0xbe/0x1f0
process_one_work+0x280/0x480
worker_thread+0x23d/0x4b0
kthread+0x163/0x180
ret_from_fork+0x1f/0x30
-> #0 ((wq_completion)loop0){+.+.}-{0:0}:
validate_chain+0x1f0d/0x33e0
__lock_acquire+0x92d/0x1030
lock_acquire+0xbe/0x1f0
flush_workqueue+0x8c/0x560
drain_workqueue+0x80/0x140
destroy_workqueue+0x47/0x4f0
__loop_clr_fd+0xb4/0x400 [loop]
blkdev_put+0x14a/0x1d0
blkdev_close+0x1c/0x20
__fput+0xfd/0x220
task_work_run+0x69/0xc0
exit_to_user_mode_prepare+0x1ce/0x1f0
syscall_exit_to_user_mode+0x26/0x60
do_syscall_64+0x4c/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
other info that might help us debug this:
Chain exists of:
(wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&lo->lo_mutex);
lock(&disk->open_mutex);
lock(&lo->lo_mutex);
lock((wq_completion)loop0);
*** DEADLOCK ***
2 locks held by systemd-udevd/7568:
#0: ffff888012554128 (&disk->open_mutex){+.+.}-{3:3}, at: blkdev_put+0x4c/0x1d0
#1: ffff888014a7d4a0 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x4d/0x400 [loop]
stack backtrace:
CPU: 0 PID: 7568 Comm: systemd-udevd Tainted: G E 5.14.0+ #757
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
Call Trace:
dump_stack_lvl+0x79/0xbf
print_circular_bug+0x5d6/0x5e0
? stack_trace_save+0x42/0x60
? save_trace+0x3d/0x2d0
check_noncircular+0x10b/0x120
validate_chain+0x1f0d/0x33e0
? __lock_acquire+0x953/0x1030
? __lock_acquire+0x953/0x1030
__lock_acquire+0x92d/0x1030
? flush_workqueue+0x70/0x560
lock_acquire+0xbe/0x1f0
? flush_workqueue+0x70/0x560
flush_workqueue+0x8c/0x560
? flush_workqueue+0x70/0x560
? sched_clock_cpu+0xe/0x1a0
? drain_workqueue+0x41/0x140
drain_workqueue+0x80/0x140
destroy_workqueue+0x47/0x4f0
? blk_mq_freeze_queue_wait+0xac/0xd0
__loop_clr_fd+0xb4/0x400 [loop]
? __mutex_unlock_slowpath+0x35/0x230
blkdev_put+0x14a/0x1d0
blkdev_close+0x1c/0x20
__fput+0xfd/0x220
task_work_run+0x69/0xc0
exit_to_user_mode_prepare+0x1ce/0x1f0
syscall_exit_to_user_mode+0x26/0x60
do_syscall_64+0x4c/0xb0
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f0fd4c661f7
Code: 00 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 13 fc ff ff
RSP: 002b:00007ffd1c9e9fd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 00007f0fd46be6c8 RCX: 00007f0fd4c661f7
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000006
RBP: 0000000000000006 R08: 000055fff1eaf400 R09: 0000000000000000
R10: 00007f0fd46be6c8 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000002f08 R15: 00007ffd1c9ea050
Commit 1c500ad706383f1a ("loop: reduce the loop_ctl_mutex scope") is for
breaking "loop_ctl_mutex => &lo->lo_mutex" dependency chain. But enabling
a different block module results in forming circular locking dependency
due to shared major_names_lock mutex.
The simplest fix is to call probe function without holding
major_names_lock [1], but Christoph Hellwig does not like such idea.
Therefore, instead of holding major_names_lock in blkdev_show(),
introduce a different lock for blkdev_show() in order to break
"sb_writers#$N => &p->lock => major_names_lock" dependency chain.
Link: https://lkml.kernel.org/r/b2af8a5b-3c1b-204e-7f56-bea0b15848d6@xxxxxxxxxxxxxxxxxxx [1]
Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/18a02da2-0bf3-550e-b071-2b4ab13c49f0@xxxxxxxxxxxxxxxxxxx
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
block/genhd.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c
index 298ee78c1bda..9aba65404416 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -164,6 +164,7 @@ static struct blk_major_name {
void (*probe)(dev_t devt);
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);
+static DEFINE_SPINLOCK(major_names_spinlock);
/* index in the above - for now: assume no multimajor ranges */
static inline int major_to_index(unsigned major)
@@ -176,11 +177,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
{
struct blk_major_name *dp;
- mutex_lock(&major_names_lock);
+ spin_lock(&major_names_spinlock);
for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
if (dp->major == offset)
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
- mutex_unlock(&major_names_lock);
+ spin_unlock(&major_names_spinlock);
}
#endif /* CONFIG_PROC_FS */
@@ -252,6 +253,7 @@ int __register_blkdev(unsigned int major, const char *name,
p->next = NULL;
index = major_to_index(major);
+ spin_lock(&major_names_spinlock);
for (n = &major_names[index]; *n; n = &(*n)->next) {
if ((*n)->major == major)
break;
@@ -260,6 +262,7 @@ int __register_blkdev(unsigned int major, const char *name,
*n = p;
else
ret = -EBUSY;
+ spin_unlock(&major_names_spinlock);
if (ret < 0) {
printk("register_blkdev: cannot get major %u for %s\n",
@@ -279,6 +282,7 @@ void unregister_blkdev(unsigned int major, const char *name)
int index = major_to_index(major);
mutex_lock(&major_names_lock);
+ spin_lock(&major_names_spinlock);
for (n = &major_names[index]; *n; n = &(*n)->next)
if ((*n)->major == major)
break;
@@ -288,6 +292,7 @@ void unregister_blkdev(unsigned int major, const char *name)
p = *n;
*n = p->next;
}
+ spin_unlock(&major_names_spinlock);
mutex_unlock(&major_names_lock);
kfree(p);
}
--
2.30.2