Re: [PATCH] sched/fair: do not announce throttled next buddy in dequeue_task_fair

From: Konstantin Khlebnikov
Date: Tue Jun 21 2016 - 09:44:43 EST


On 16.06.2016 15:57, Konstantin Khlebnikov wrote:
Hierarchy could be already throttled at this point. Throttled next
buddy could trigger null pointer dereference in pick_next_task_fair().

trivial debug in set_next_buddy

@@ -4755,8 +4758,11 @@ static void set_next_buddy(struct sched_entity *se)
if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
return;

- for_each_sched_entity(se)
+ for_each_sched_entity(se) {
+ if (WARN_ON_ONCE(!se->on_rq))
+ return;
cfs_rq_of(se)->next = se;
+ }
}

catched this

<4>[32815.274642] ------------[ cut here ]------------
<4>[32815.274651] WARNING: CPU: 6 PID: 92082 at kernel/sched/fair.c:4819 set_next_buddy+0x61/0x70()
<4>[32815.274652] Modules linked in: macvlan overlay ip6t_REJECT nf_reject_ipv6 ip6table_filter xt_multiport ip6_tables x_tables tcp_diag inet_diag bridge cls_cgroup sch_htb netconsole configfs 8021q mrp garp stp llc x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm crc32_pclmul aesni_intel ablk_helper cryptd lrw gf128mul ast ttm drm_kms_helper drm glue_helper aes_x86_64 sb_edac edac_core microcode sysimgblt sysfillrect syscopyarea lpc_ich mlx4_en mlx4_core vxlan udp_tunnel ip6_udp_tunnel tcp_htcp igb i2c_algo_bit ixgbe i2c_core dca ahci ptp libahci pps_core mdio raid10 raid456 async_pq async_xor xor async_memcpy async_raid6_recov raid6_pq async_tx raid1 raid0 multipath linear [last unloaded: ipmi_msghandler]
<4>[32815.274691] CPU: 6 PID: 92082 Comm: mlauncher.pl Not tainted 3.18.25-28.debug.7 #1
<4>[32815.274692] Hardware name: GIGABYTE T17I-234/GA-7PTSH, BIOS R22 07/21/2015
<4>[32815.274694] 00000000000012d3 ffff8827a6623b08 ffffffff816d0ec3 0000000000000007
<4>[32815.274695] 0000000000000000 ffff8827a6623b48 ffffffff8106bb2c ffff8827a6623b78
<4>[32815.274697] ffff8800795eca80 ffff880079622480 ffff8800795eca80 ffff88326fb9dc20
<4>[32815.274699] Call Trace:
<4>[32815.274704] [<ffffffff816d0ec3>] dump_stack+0x4e/0x68
<4>[32815.274707] [<ffffffff8106bb2c>] warn_slowpath_common+0x8c/0xc0
<4>[32815.274709] [<ffffffff8106bb7a>] warn_slowpath_null+0x1a/0x20
<4>[32815.274710] [<ffffffff8109af81>] set_next_buddy+0x61/0x70
<4>[32815.274712] [<ffffffff8109ea98>] check_preempt_wakeup+0x208/0x280
<4>[32815.274717] [<ffffffff81092abf>] check_preempt_curr+0x8f/0xa0
<4>[32815.274718] [<ffffffff8109b9e1>] attach_task+0x51/0x60
<4>[32815.274721] [<ffffffff810a1c55>] load_balance+0x605/0x970
<4>[32815.274723] [<ffffffff810a247a>] pick_next_task_fair+0x4ba/0x730
<4>[32815.274725] [<ffffffff8109e625>] ? dequeue_task_fair+0x315/0x580
<4>[32815.274729] [<ffffffff816d4143>] __schedule+0x103/0x800
<4>[32815.274731] [<ffffffff816d4919>] schedule+0x29/0x70
<4>[32815.274733] [<ffffffff816d7a5c>] do_nanosleep+0xac/0x130
<4>[32815.274736] [<ffffffff810c358d>] hrtimer_nanosleep+0xad/0x160
<4>[32815.274738] [<ffffffff810c1f70>] ? update_rmtp+0x70/0x70
<4>[32815.274740] [<ffffffff810c36b6>] SyS_nanosleep+0x76/0x90
<4>[32815.274741] [<ffffffff816d8f09>] system_call_fastpath+0x12/0x17
<4>[32815.274743] ---[ end trace 656c1001c069cc10 ]---


Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
Cc: Stable <stable@xxxxxxxxxxxxxxx> # v3.2+
---
kernel/sched/fair.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fe809fe169d2..3c6b038cb734 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4519,15 +4519,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)

/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) {
+ /* avoid re-evaluating load for this entity */
+ se = parent_entity(se);
/*
* Bias pick_next to pick a task from this cfs_rq, as
* p is sleeping when it is within its sched_slice.
*/
- if (task_sleep && parent_entity(se))
- set_next_buddy(parent_entity(se));
-
- /* avoid re-evaluating load for this entity */
- se = parent_entity(se);
+ if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+ set_next_buddy(se);
break;
}
flags |= DEQUEUE_SLEEP;



--
Konstantin