Re: [PATCH 10/24] sched/uclamg: Handle delayed dequeue

From: Mike Galbraith
Date: Thu Sep 12 2024 - 10:01:29 EST


On Wed, 2024-09-11 at 11:27 +0200, Mike Galbraith wrote:
> On Wed, 2024-09-11 at 11:13 +0200, Peter Zijlstra wrote:
> > On Wed, Sep 11, 2024 at 11:10:26AM +0200, Mike Galbraith wrote:
> > >
> > > Hm, would be interesting to know how the heck he's triggering that.
> > >
> > > My x86_64 box refuses to produce any such artifacts with anything I've
> > > tossed at it, including full LTP with enterprise RT and !RT configs,
> > > both in master and my local SLE15-SP7 branch.  Hohum.
> >
> > Yeah, my hackbench runs also didn't show that. Perhaps something funny
> > with cgroups. I didn't test cgroup bandwidth for exanple.
>
> That's all on in enterprise configs tested with LTP, so hypothetically
> got some testing.  I also turned on AUTOGROUP in !RT configs so cgroups
> would get some exercise no matter what I'm mucking about with.

Oho, I just hit a pick_eevdf() returns NULL in pick_next_entity() and
we deref it bug in tip that I recall having seen someone else mention
them having hit. LTP was chugging away doing lord knows what when
evolution apparently decided to check accounts, which didn't go well.

state=TASK_WAKING(?), on_rq=0, on_cpu=1, cfs_rq.nr_running=0

crash> bt -sx
PID: 29024 TASK: ffff9118b7583300 CPU: 1 COMMAND: "pool-evolution"
#0 [ffffa939dfd0f930] machine_kexec+0x1a0 at ffffffffab886cc0
#1 [ffffa939dfd0f990] __crash_kexec+0x6a at ffffffffab99496a
#2 [ffffa939dfd0fa50] crash_kexec+0x23 at ffffffffab994e33
#3 [ffffa939dfd0fa60] oops_end+0xbe at ffffffffab844b4e
#4 [ffffa939dfd0fa80] page_fault_oops+0x151 at ffffffffab898fc1
#5 [ffffa939dfd0fb08] exc_page_fault+0x6b at ffffffffac3a410b
#6 [ffffa939dfd0fb30] asm_exc_page_fault+0x22 at ffffffffac400ac2
[exception RIP: pick_task_fair+113]
RIP: ffffffffab8fb471 RSP: ffffa939dfd0fbe0 RFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff91180735ee00 RCX: 000b709eab0437d5
RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff91180735ee00
RBP: ffff91180735f400 R8: 00000000000001d9 R9: 0000000000000000
R10: ffff911a8ecb9380 R11: 0000000000000000 R12: ffff911a8eab89c0
R13: ffff911a8eab8a40 R14: ffffffffacafc373 R15: ffff9118b7583300
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#7 [ffffa939dfd0fc08] pick_next_task_fair+0x48 at ffffffffab9013b8
#8 [ffffa939dfd0fc48] __schedule+0x1d9 at ffffffffac3aab39
#9 [ffffa939dfd0fcf8] schedule+0x24 at ffffffffac3ac084
#10 [ffffa939dfd0fd10] futex_wait_queue+0x63 at ffffffffab98e353
#11 [ffffa939dfd0fd38] __futex_wait+0x139 at ffffffffab98e989
#12 [ffffa939dfd0fdf0] futex_wait+0x6a at ffffffffab98ea5a
#13 [ffffa939dfd0fe80] do_futex+0x88 at ffffffffab98a9f8
#14 [ffffa939dfd0fe90] __x64_sys_futex+0x5e at ffffffffab98ab0e
#15 [ffffa939dfd0ff00] do_syscall_64+0x74 at ffffffffac39ce44
#16 [ffffa939dfd0ff40] entry_SYSCALL_64_after_hwframe+0x4b at ffffffffac4000ac
RIP: 00007fd6b991a849 RSP: 00007fd6813ff6e8 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 0000000000000a6c RCX: 00007fd6b991a849
RDX: 0000000000000a6c RSI: 0000000000000080 RDI: 00005631abf620c0
RBP: 00005631abf620b8 R8: 00007fd6bad0a080 R9: 00000000000015fe
R10: 00007fd6813ff700 R11: 0000000000000246 R12: 00005631abf620b0
R13: 00005631abf620b0 R14: 00005631abf620b8 R15: 0000000000000000
ORIG_RAX: 00000000000000ca CS: 0033 SS: 002b
crash> dis pick_task_fair+113
0xffffffffab8fb471 <pick_task_fair+113>: cmpb $0x0,0x51(%rax)
crash> gdb list *pick_task_fair+113
0xffffffffab8fb471 is in pick_task_fair (kernel/sched/fair.c:5639).
5634 SCHED_WARN_ON(cfs_rq->next->sched_delayed);
5635 return cfs_rq->next;
5636 }
5637
5638 struct sched_entity *se = pick_eevdf(cfs_rq);
5639 if (se->sched_delayed) {
5640 dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
5641 SCHED_WARN_ON(se->sched_delayed);
5642 SCHED_WARN_ON(se->on_rq);
5643 return NULL;
crash> task_struct -x 0xffff9118b7583300 | grep "__state ="
__state = 0x200,
crash> task_struct -x 0xffff9118b7583300 | grep rq
on_rq = 0x0,
on_rq = 0x0,
cfs_rq = 0xffff9117e81a3e00,
on_rq = 0x0,
rq = 0x0,
crash> task_struct -xo | grep sched_entity
[0x80] struct sched_entity se
crash> sched_entity 0xffff9118b7583380
struct sched_entity {
load = {
weight = 1048576,
inv_weight = 4194304
},
run_node = {
__rb_parent_color = 1,
rb_right = 0x0,
rb_left = 0x0
},
deadline = 5788784166,
min_vruntime = 5785784166,
min_slice = 3000000,
group_node = {
next = 0xffff9118b75833c0,
prev = 0xffff9118b75833c0
},
on_rq = 0 '\000',
sched_delayed = 0 '\000',
rel_deadline = 0 '\000',
custom_slice = 0 '\000',
exec_start = 5630407844294,
sum_exec_runtime = 5031478,
prev_sum_exec_runtime = 5004139,
vruntime = 5785811505,
vlag = 0,
slice = 3000000,
nr_migrations = 0,
depth = 1,
parent = 0xffff9117e81a0600,
cfs_rq = 0xffff9117e81a3e00,
my_q = 0x0,
runnable_weight = 0,
avg = {
last_update_time = 5630386353152,
load_sum = 2555,
runnable_sum = 2617274,
util_sum = 83342,
period_contrib = 877,
load_avg = 39,
runnable_avg = 39,
util_avg = 1,
util_est = 2147483760
}
}
crash> cfs_rq 0xffff9117e81a3e00
struct cfs_rq {
load = {
weight = 0,
inv_weight = 0
},
nr_running = 0,
h_nr_running = 0,
idle_nr_running = 0,
idle_h_nr_running = 0,
h_nr_delayed = 0,
avg_vruntime = 0,
avg_load = 0,
min_vruntime = 5785811505,
forceidle_seq = 0,
min_vruntime_fi = 0,
tasks_timeline = {
rb_root = {
rb_node = 0x0
},
rb_leftmost = 0x0
},
curr = 0xffff9118b7583380,
next = 0x0,
avg = {
last_update_time = 5630386353152,
load_sum = 2617381,
runnable_sum = 2617379,
util_sum = 83417,
period_contrib = 877,
load_avg = 39,
runnable_avg = 39,
util_avg = 1,
util_est = 0
},
removed = {
lock = {
raw_lock = {
{
val = {
counter = 0
},
{
locked = 0 '\000',
pending = 0 '\000'
},
{
locked_pending = 0,
tail = 0
}
}
}
},
nr = 0,
load_avg = 0,
util_avg = 0,
runnable_avg = 0
},
last_update_tg_load_avg = 5630407057919,
tg_load_avg_contrib = 39,
propagate = 0,
prop_runnable_sum = 0,
h_load = 0,
last_h_load_update = 4296299815,
h_load_next = 0x0,
rq = 0xffff911a8eab89c0,
on_list = 1,
leaf_cfs_rq_list = {
next = 0xffff911794a2d348,
prev = 0xffff9119ebe62148
},
tg = 0xffff91178434a080,
idle = 0,
runtime_enabled = 0,
runtime_remaining = 0,
throttled_pelt_idle = 0,
throttled_clock = 0,
throttled_clock_pelt = 0,
throttled_clock_pelt_time = 0,
throttled_clock_self = 0,
throttled_clock_self_time = 0,
throttled = 0,
throttle_count = 0,
throttled_list = {
next = 0xffff9117e81a3fa8,
prev = 0xffff9117e81a3fa8
},
throttled_csd_list = {
next = 0xffff9117e81a3fb8,
prev = 0xffff9117e81a3fb8
}
}
crash>