[PATCH] sched/core: defer printk() while rq lock is held
From: Tetsuo Handa
Date: Thu Jun 27 2024 - 07:10:30 EST
syzbot is reporting circular locking dependency inside __bpf_prog_run()
when trace_sched_switch() hook is called from __schedule(), for fault
injection calls printk() despite rq lock is already held.
Since any debugging functionality might call printk(), guard the whole
section between raw_spin_rq_{lock,lock_nested,trylock}() and
raw_spin_rq_unlock() using printk_deferred_{enter,exit}().
======================================================
WARNING: possible circular locking dependency detected
6.10.0-rc4-syzkaller-00874-g84562f9953ec #0 Not tainted
------------------------------------------------------
syz-executor.1/25480 is trying to acquire lock:
ffffffff8e328140 (console_owner){..-.}-{0:0}, at: rcu_try_lock_acquire include/linux/rcupdate.h:334 [inline]
ffffffff8e328140 (console_owner){..-.}-{0:0}, at: srcu_read_lock_nmisafe include/linux/srcu.h:232 [inline]
ffffffff8e328140 (console_owner){..-.}-{0:0}, at: console_srcu_read_lock kernel/printk/printk.c:286 [inline]
ffffffff8e328140 (console_owner){..-.}-{0:0}, at: console_flush_all+0x152/0xfd0 kernel/printk/printk.c:2986
but task is already holding lock:
ffff8880b943e798 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x2a/0x140 kernel/sched/core.c:559
which lock already depends on the new lock.
(...snipped...)
Chain exists of:
console_owner --> &p->pi_lock --> &rq->__lock
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&rq->__lock);
lock(&p->pi_lock);
lock(&rq->__lock);
lock(console_owner);
*** DEADLOCK ***
6 locks held by syz-executor.1/25480:
#0: ffffffff8f5e6f48 (rtnl_mutex){+.+.}-{3:3}, at: dev_ioctl+0x706/0x1340 net/core/dev_ioctl.c:785
#1: ffffffff8f67dd68 (flowtable_lock){+.+.}-{3:3}, at: nf_flow_table_cleanup+0x23/0xb0 net/netfilter/nf_flow_table_core.c:593
#2: ffff8880b943e798 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x2a/0x140 kernel/sched/core.c:559
#3: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
#3: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:781 [inline]
#3: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: __bpf_trace_run kernel/trace/bpf_trace.c:2402 [inline]
#3: ffffffff8e333fa0 (rcu_read_lock){....}-{1:2}, at: bpf_trace_run4+0x244/0x590 kernel/trace/bpf_trace.c:2446
#4: ffffffff8e20fa60 (console_lock){+.+.}-{0:0}, at: _printk+0xd5/0x120 kernel/printk/printk.c:2370
#5: ffffffff8e20f690 (console_srcu){....}-{0:0}, at: rcu_try_lock_acquire include/linux/rcupdate.h:334 [inline]
#5: ffffffff8e20f690 (console_srcu){....}-{0:0}, at: srcu_read_lock_nmisafe include/linux/srcu.h:232 [inline]
#5: ffffffff8e20f690 (console_srcu){....}-{0:0}, at: console_srcu_read_lock kernel/printk/printk.c:286 [inline]
#5: ffffffff8e20f690 (console_srcu){....}-{0:0}, at: console_flush_all+0x152/0xfd0 kernel/printk/printk.c:2986
stack backtrace:
CPU: 0 PID: 25480 Comm: syz-executor.1 Not tainted 6.10.0-rc4-syzkaller-00874-g84562f9953ec #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187
check_prev_add kernel/locking/lockdep.c:3134 [inline]
check_prevs_add kernel/locking/lockdep.c:3253 [inline]
validate_chain+0x18e0/0x5900 kernel/locking/lockdep.c:3869
__lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137
lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754
console_lock_spinning_enable kernel/printk/printk.c:1870 [inline]
console_emit_next_record kernel/printk/printk.c:2922 [inline]
console_flush_all+0x810/0xfd0 kernel/printk/printk.c:2994
console_unlock+0x13b/0x4d0 kernel/printk/printk.c:3063
vprintk_emit+0x5a6/0x770 kernel/printk/printk.c:2345
_printk+0xd5/0x120 kernel/printk/printk.c:2370
fail_dump lib/fault-inject.c:45 [inline]
should_fail_ex+0x391/0x4e0 lib/fault-inject.c:153
__copy_to_user_inatomic include/linux/uaccess.h:123 [inline]
copy_to_user_nofault+0x86/0x140 mm/maccess.c:149
bpf_prog_b0a3dac844962ed2+0x47/0x4d
bpf_dispatcher_nop_func include/linux/bpf.h:1243 [inline]
__bpf_prog_run include/linux/filter.h:691 [inline]
bpf_prog_run include/linux/filter.h:698 [inline]
__bpf_trace_run kernel/trace/bpf_trace.c:2403 [inline]
bpf_trace_run4+0x334/0x590 kernel/trace/bpf_trace.c:2446
__traceiter_sched_switch+0x98/0xd0 include/trace/events/sched.h:222
trace_sched_switch include/trace/events/sched.h:222 [inline]
__schedule+0x2587/0x4a20 kernel/sched/core.c:6742
preempt_schedule_notrace+0x100/0x140 kernel/sched/core.c:7017
preempt_schedule_notrace_thunk+0x1a/0x30 arch/x86/entry/thunk.S:13
rcu_is_watching+0x7e/0xb0 kernel/rcu/tree.c:725
trace_lock_acquire include/trace/events/lock.h:24 [inline]
lock_acquire+0xe3/0x550 kernel/locking/lockdep.c:5725
rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
rcu_read_lock include/linux/rcupdate.h:781 [inline]
start_flush_work kernel/workqueue.c:4122 [inline]
__flush_work+0x107/0xd00 kernel/workqueue.c:4181
flush_work kernel/workqueue.c:4232 [inline]
flush_delayed_work+0x169/0x1c0 kernel/workqueue.c:4254
nf_flow_table_gc_cleanup net/netfilter/nf_flow_table_core.c:585 [inline]
nf_flow_table_cleanup+0x62/0xb0 net/netfilter/nf_flow_table_core.c:595
flow_offload_netdev_event+0x51/0x70 net/netfilter/nft_flow_offload.c:492
notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93
__dev_notify_flags+0x207/0x400
dev_change_flags+0xf0/0x1a0 net/core/dev.c:8858
dev_ifsioc+0x7c8/0xe70 net/core/dev_ioctl.c:529
dev_ioctl+0x719/0x1340 net/core/dev_ioctl.c:786
sock_do_ioctl+0x240/0x460 net/socket.c:1236
sock_ioctl+0x629/0x8e0 net/socket.c:1341
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:907 [inline]
__se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Reported-by: syzbot <syzbot+f78380e4eae53c64125c@xxxxxxxxxxxxxxxxxxxxxxxxx>
Closes: https://syzkaller.appspot.com/bug?extid=f78380e4eae53c64125c
Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
---
kernel/sched/core.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcf2c4cc0522..134f5196b9c4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -559,6 +559,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
raw_spin_lock_nested(&rq->__lock, subclass);
/* preempt_count *MUST* be > 1 */
preempt_enable_no_resched();
+ printk_deferred_enter();
return;
}
@@ -568,6 +569,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
if (likely(lock == __rq_lockp(rq))) {
/* preempt_count *MUST* be > 1 */
preempt_enable_no_resched();
+ printk_deferred_enter();
return;
}
raw_spin_unlock(lock);
@@ -584,6 +586,8 @@ bool raw_spin_rq_trylock(struct rq *rq)
if (sched_core_disabled()) {
ret = raw_spin_trylock(&rq->__lock);
preempt_enable();
+ if (ret)
+ printk_deferred_enter();
return ret;
}
@@ -592,6 +596,8 @@ bool raw_spin_rq_trylock(struct rq *rq)
ret = raw_spin_trylock(lock);
if (!ret || (likely(lock == __rq_lockp(rq)))) {
preempt_enable();
+ if (ret)
+ printk_deferred_enter();
return ret;
}
raw_spin_unlock(lock);
@@ -600,6 +606,7 @@ bool raw_spin_rq_trylock(struct rq *rq)
void raw_spin_rq_unlock(struct rq *rq)
{
+ printk_deferred_exit();
raw_spin_unlock(rq_lockp(rq));
}
--
2.43.0