Re: cgroup: WARNING in cgroup_kill_sb
From: Dmitry Vyukov
Date: Tue Mar 07 2017 - 04:12:45 EST
On Mon, Mar 6, 2017 at 10:55 PM, Tejun Heo <tj@xxxxxxxxxx> wrote:
> Hello, Dmitry.
>
> Can you please see whether the following patch resolves the issue?
> I'm a bit nervous about it ending up in circular dependency, but I
> *think* it should be okay.
>
> Thanks.
>
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 0125589..9c40421 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -1820,6 +1820,8 @@ static void cgroup_kill_sb(struct super_block *sb)
> struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
> struct cgroup_root *root = cgroup_root_from_kf(kf_root);
>
> + mutex_lock(&cgroup_mutex);
> +
> /*
> * If @root doesn't have any mounts or children, start killing it.
> * This prevents new mounts by disabling percpu_ref_tryget_live().
> @@ -1834,6 +1836,8 @@ static void cgroup_kill_sb(struct super_block *sb)
> percpu_ref_kill(&root->cgrp.self.refcnt);
>
> kernfs_kill_sb(sb);
> +
> + mutex_unlock(&cgroup_mutex);
> }
>
> struct file_system_type cgroup_fs_type = {
No, still happens. Please run the repro.
[ 367.607496] ------------[ cut here ]------------
[ 367.608012] WARNING: CPU: 1 PID: 16161 at lib/percpu-refcount.c:317
percpu_ref_kill_and_confirm+0x3ff/0x500
[ 367.608019] percpu_ref_kill_and_confirm called more than once on css_release!
[ 367.608019] Kernel panic - not syncing: panic_on_warn set ...
[ 367.608019]
[ 367.608019] CPU: 1 PID: 16161 Comm: a.out Not tainted 4.11.0-rc1+ #311
[ 367.608019] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Bochs 01/01/2011
[ 367.608019] Call Trace:
[ 367.608019] dump_stack+0x2fb/0x3fd
[ 367.608019] ? arch_local_irq_restore+0x53/0x53
[ 367.608019] ? vprintk_emit+0x566/0x770
[ 367.608019] ? console_unlock+0xf50/0xf50
[ 367.608019] ? kasan_check_write+0x14/0x20
[ 367.608019] ? sched_clock_local+0xe2/0x150
[ 367.608019] ? do_raw_spin_trylock+0x1a0/0x1a0
[ 367.608019] ? sched_clock_cpu+0x12e/0x170
[ 367.608019] ? memcpy+0x45/0x50
[ 367.608019] ? vprintk_emit+0x566/0x770
[ 367.608019] ? console_unlock+0xf50/0xf50
[ 367.608019] ? percpu_ref_kill_and_confirm+0xeb/0x500
[ 367.608019] ? check_noncircular+0x20/0x20
[ 367.608019] ? vprintk_default+0x28/0x30
[ 367.608019] ? vprintk_func+0x47/0x90
[ 367.608019] ? printk+0xc8/0xf9
[ 367.608019] ? load_image_and_restore+0x134/0x134
[ 367.608019] ? pointer+0xac0/0xac0
[ 367.608019] panic+0x20f/0x426
[ 367.608019] ? copy_mm+0x1219/0x1219
[ 367.608019] ? percpu_ref_kill_and_confirm+0x3ff/0x500
[ 367.608019] ? vprintk_default+0x28/0x30
[ 367.608019] ? percpu_ref_kill_and_confirm+0x3ff/0x500
[ 367.608019] __warn+0x1c4/0x1e0
[ 367.608019] warn_slowpath_fmt+0xc5/0x100
[ 367.608019] ? __warn+0x1e0/0x1e0
[ 367.608019] ? depot_save_stack+0x12c/0x480
[ 367.608019] ? css_free_rcu_fn+0x1d0/0x1d0
[ 367.608019] percpu_ref_kill_and_confirm+0x3ff/0x500
[ 367.608019] ? __percpu_ref_switch_mode+0x850/0x850
[ 367.608019] ? deactivate_super+0x173/0x1b0
[ 367.608019] ? cleanup_mnt+0xb2/0x160
[ 367.608019] ? __cleanup_mnt+0x16/0x20
[ 367.608019] ? task_work_run+0x197/0x260
[ 367.608019] ? exit_to_usermode_loop+0x23b/0x2a0
[ 367.608019] ? mark_held_locks+0xaf/0x100
[ 367.608019] ? kfree+0xed/0x250
[ 367.608019] ? trace_hardirqs_on_caller+0x545/0x6f0
[ 367.608019] ? mark_held_locks+0x100/0x100
[ 367.608019] ? lock_set_class+0xc00/0xc00
[ 367.608019] ? check_same_owner+0x320/0x320
[ 367.608019] cgroup_kill_sb+0x196/0x550
[ 367.608019] ? cgroup_migrate_add_task+0xc60/0xc60
[ 367.608019] ? unregister_shrinker+0x1c1/0x2c0
[ 367.608019] ? perf_trace_mm_vmscan_writepage+0x7a0/0x7a0
[ 367.608019] ? down_write+0x8c/0x120
[ 367.608019] ? down_read+0x150/0x150
[ 367.608019] deactivate_locked_super+0x99/0xe0
[ 367.608019] deactivate_super+0x173/0x1b0
[ 367.608019] ? mount_ns+0x190/0x190
[ 367.608019] ? dput.part.25+0x2a/0x7c0
[ 367.608019] ? dput.part.25+0x176/0x7c0
[ 367.608019] ? dput.part.25+0x2a/0x7c0
[ 367.608019] cleanup_mnt+0xb2/0x160
[ 367.608019] __cleanup_mnt+0x16/0x20
[ 367.608019] task_work_run+0x197/0x260
[ 367.608019] ? task_work_cancel+0x2f0/0x2f0
[ 367.608019] ? __unwind_start+0x380/0x380
[ 367.608019] ? entry_SYSCALL_64_fastpath+0x1f/0xc2
[ 367.608019] exit_to_usermode_loop+0x23b/0x2a0
[ 367.608019] ? trace_event_raw_event_sys_exit+0x270/0x270
[ 367.608019] ? __save_stack_trace+0x7e/0xd0
[ 367.608019] syscall_return_slowpath+0x4d3/0x570
[ 367.608019] ? prepare_exit_to_usermode+0x2e0/0x2e0
[ 367.608019] ? save_stack_trace+0x16/0x20
[ 367.608019] ? save_stack+0x43/0xd0
[ 367.608019] ? kasan_slab_free+0x6f/0xb0
[ 367.608019] ? kfree+0xd3/0x250
[ 367.608019] ? SyS_mount+0xcf/0x120
[ 367.608019] ? entry_SYSCALL_64_fastpath+0x1f/0xc2
[ 367.608019] ? mntput+0x66/0x90
[ 367.608019] ? check_noncircular+0x20/0x20
[ 367.608019] ? kfree+0xed/0x250
[ 367.608019] ? entry_SYSCALL_64_fastpath+0x93/0xc2
[ 367.608019] ? trace_hardirqs_on_caller+0x545/0x6f0
[ 367.608019] ? mark_held_locks+0x100/0x100
[ 367.608019] ? check_stack_object+0x140/0x140
[ 367.608019] ? check_stack_object+0x140/0x140
[ 367.608019] ? rcu_read_lock_sched_held+0x108/0x120
[ 367.608019] ? __kmalloc_track_caller+0x40a/0x6f0
[ 367.608019] ? SyS_mount+0xcf/0x120
[ 367.608019] ? trace_hardirqs_off+0xd/0x10
[ 367.608019] ? quarantine_put+0xea/0x190
[ 367.608019] ? SyS_mount+0xcf/0x120
[ 367.608019] ? trace_hardirqs_on_thunk+0x1a/0x1c
[ 367.608019] entry_SYSCALL_64_fastpath+0xc0/0xc2
[ 367.608019] RIP: 0033:0x440b39
[ 367.608019] RSP: 002b:00007f86c630adb8 EFLAGS: 00000202 ORIG_RAX:
00000000000000a5
[ 367.608019] RAX: ffffffffffffffec RBX: 0000000000000000 RCX: 0000000000440b39
[ 367.608019] RDX: 00000000004a0f3b RSI: 00000000004a0f34 RDI: 00000000004a0f34
[ 367.608019] RBP: 00007f86c630add0 R08: 0000000000000000 R09: 0000000000000000
[ 367.608019] R10: 0000000000000005 R11: 0000000000000202 R12: 0000000000000000
[ 367.608019] R13: 0000000000000000 R14: 00007f86c630b9c0 R15: 00007f86c630b700
[ 367.608019] Kernel Offset: disabled
[ 367.608019] Rebooting in 86400 seconds..