BUG: spinlock recursion in sched/fair.c | struct rq::lock
From: Vegard Nossum
Date: Sat Aug 20 2016 - 06:53:39 EST
Hi,
Got this:
BUG: spinlock recursion on CPU#0, trinity-c2/27519
lock: 0xffff88011ab20180, .magic: dead4ead, .owner: trinity-c2/27519,
.owner_cpu: 2
CPU: 0 PID: 27519 Comm: trinity-c2 Not tainted 4.8.0-rc2+ #143
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
ffff880105870000 ffff88011289ef30 ffffffff81f08d11 ffff88011ab20180
ffff880105870000 ffff88011289ef68 ffffffff8134289d ffffffff81312cde
0000000000000000 ffff88011ab20180 0000000000000082 ffff88011ab20190
Call Trace:
[<ffffffff81f08d11>] dump_stack+0x65/0x84
[<ffffffff8134289d>] spin_dump+0x14d/0x270
[<ffffffff81312cde>] ? load_balance+0x62e/0x24c0
[<ffffffff81342e8f>] do_raw_spin_lock+0x1ff/0x2b0
[<ffffffff83ae34d1>] _raw_spin_lock_irqsave+0x51/0x60
[<ffffffff81312cde>] ? load_balance+0x62e/0x24c0
[<ffffffff81312cde>] load_balance+0x62e/0x24c0
[<ffffffff813126b0>] ? find_busiest_group+0x2450/0x2450
[<ffffffff811b3979>] ? sched_clock+0x9/0x10
[<ffffffff812e7922>] ? sched_clock_cpu+0x152/0x1e0
[<ffffffff81315453>] pick_next_task_fair+0x8e3/0x1290
[<ffffffff813151f2>] ? pick_next_task_fair+0x682/0x1290
[<ffffffff81314b70>] ? load_balance+0x24c0/0x24c0
[<ffffffff812cf3ec>] ? deactivate_task+0x11c/0x310
[<ffffffff83ad499d>] __schedule+0x23d/0x19a0
[<ffffffff83ad629a>] schedule+0x9a/0x1c0
[<ffffffff83904486>] p9_client_rpc+0x336/0xde0
[<ffffffff83904150>] ? p9_client_flush+0x1c0/0x1c0
[<ffffffff8171c2a1>] ? posix_lock_inode+0x661/0x1b80
[<ffffffff8171c2fe>] ? posix_lock_inode+0x6be/0x1b80
[<ffffffff8171bc40>] ? locks_copy_lock+0x1a0/0x1a0
[<ffffffff8132c4b0>] ? wake_atomic_t_function+0x210/0x210
[<ffffffff812e5ab0>] ? __might_sleep+0x90/0x1a0
[<ffffffff81721a77>] ? locks_lock_inode_wait+0x207/0x310
[<ffffffff8390c98f>] p9_client_lock_dotl+0x16f/0x4a0
[<ffffffff81ab2c52>] v9fs_file_do_lock+0x2b2/0x4f0
[<ffffffff81ab29a0>] ? v9fs_mmap_file_mmap+0x2d0/0x2d0
[<ffffffff814a86fa>] ? perf_iterate_sb+0x7a/0x720
[<ffffffff81ab32f9>] v9fs_file_lock_dotl+0x1e9/0x850
[<ffffffff814a8680>] ? perf_event_mmap_output+0x800/0x800
[<ffffffff81ab3110>] ? v9fs_file_flock_dotl+0x280/0x280
[<ffffffff81f68193>] ? __this_cpu_preempt_check+0x13/0x20
[<ffffffff814b2ec8>] ? __perf_event_task_sched_in+0x128/0x690
[<ffffffff8172236c>] vfs_lock_file+0x6c/0xf0
[<ffffffff817226f3>] locks_remove_posix.part.35+0x103/0x2e0
[<ffffffff812c6aa0>] ? finish_task_switch+0x180/0x660
[<ffffffff817225f0>] ? do_lock_file_wait+0x200/0x200
[<ffffffff816f419e>] ? fsnotify_find_inode_mark+0x1e/0x50
[<ffffffff816f41b8>] ? fsnotify_find_inode_mark+0x38/0x50
[<ffffffff83ae366c>] ? _raw_spin_unlock+0x2c/0x50
[<ffffffff816f41b8>] ? fsnotify_find_inode_mark+0x38/0x50
[<ffffffff81723416>] locks_remove_posix+0x86/0xd0
[<ffffffff816276dc>] filp_close+0xdc/0x130
[<ffffffff81684454>] put_files_struct+0x124/0x230
[<ffffffff81684679>] exit_files+0x79/0xa0
[<ffffffff8125f850>] do_exit+0x730/0x2b70
[<ffffffff814c26c4>] ? ___perf_sw_event+0x284/0x330
[<ffffffff814c2534>] ? ___perf_sw_event+0xf4/0x330
[<ffffffff8149c680>] ? perf_log_itrace_start+0x3a0/0x3a0
[<ffffffff8125f120>] ? mm_update_next_owner+0x720/0x720
[<ffffffff812818c2>] ? get_signal+0x1a2/0x1120
[<ffffffff81261dc4>] do_group_exit+0xf4/0x2f0
[<ffffffff81281c5d>] get_signal+0x53d/0x1120
[<ffffffff81339812>] ? __lock_acquire.isra.32+0xc2/0x1a30
[<ffffffff8119e993>] do_signal+0x83/0x1e20
[<ffffffff81f68177>] ? debug_smp_processor_id+0x17/0x20
[<ffffffff8119e910>] ? setup_sigcontext+0x7d0/0x7d0
[<ffffffff8121eb0b>] ? __do_page_fault+0x53b/0x8f0
[<ffffffff814a8717>] ? perf_iterate_sb+0x97/0x720
[<ffffffff8121f0fd>] ? trace_do_page_fault+0x18d/0x310
[<ffffffff812125d7>] ? do_async_page_fault+0x27/0xa0
[<ffffffff83ae4f98>] ? async_page_fault+0x28/0x30
[<ffffffff810027af>] exit_to_usermode_loop+0xaf/0x140
[<ffffffff810051e4>] syscall_return_slowpath+0x144/0x170
[<ffffffff83ae406f>] ret_from_fork+0x2f/0x40
$ addr2line -e vmlinux -i ffffffff81342e8f ffffffff81312cde
ffffffff81315453 ffffffff83ad499d ffffffff83ad629a ffffffff83904486
kernel/locking/spinlock_debug.c:75
kernel/locking/spinlock_debug.c:84
kernel/locking/spinlock_debug.c:135
kernel/sched/fair.c:7502 // load_balance()
kernel/sched/fair.c:7770 // idle_balance()
kernel/sched/fair.c:5789 // pick_next_task_fair()
kernel/sched/core.c:3234 // pick_next_task()
kernel/sched/core.c:3365 // __schedule()
kernel/sched/core.c:3406 (discriminator 1) // schedule()
net/9p/client.c:752 (discriminator 13)
It's this bit in load_balance():
more_balance:
raw_spin_lock_irqsave(&busiest->lock, flags);
This is the bit in pick_next_task_fair() and it looks relevant to me
(although I have no scheduler chops):
idle:
/*
* This is OK, because current is on_cpu, which avoids it being
picked
* for load-balance and preemption/IRQs are still disabled avoiding
* further scheduler activity on it and we're being very careful to
* re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock, cookie);
new_tasks = idle_balance(rq);
lockdep_repin_lock(&rq->lock, cookie);
I have:
$ grep SCHED .config
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_CGROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
# CONFIG_SCHED_AUTOGROUP is not set
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_SCHED_OMIT_FRAME_POINTER=y
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
CONFIG_SCHED_HRTICK=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set
# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set
CONFIG_NET_SCHED=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHED_INFO=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_SCHED_STACK_END_CHECK is not set
# CONFIG_SCHED_TRACER is not set
Vegard