[PATCH] Fix CPU hotplug causing crashes in task selection logic

From: Joel Fernandes
Date: Tue Jun 30 2020 - 19:39:45 EST


Signed-off-by: Joel Fernandes <joelaf@xxxxxxxxxx>
---
kernel/sched/core.c | 34 ++++++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0362102fa3d2..47a21013ba0d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4464,7 +4464,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *next, *max = NULL;
const struct sched_class *class;
- const struct cpumask *smt_mask;
+ struct cpumask select_mask;
int i, j, cpu, occ = 0;
bool need_sync;

@@ -4499,7 +4499,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
finish_prev_task(rq, prev, rf);

cpu = cpu_of(rq);
- smt_mask = cpu_smt_mask(cpu);
+ cpumask_copy(&select_mask, cpu_smt_mask(cpu));
+
+ /*
+ * Always make sure current CPU is added to smt_mask so that below
+ * selection logic runs on it.
+ */
+ cpumask_set_cpu(cpu, &select_mask);

/*
* core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
@@ -4516,7 +4522,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)

/* reset state */
rq->core->core_cookie = 0UL;
- for_each_cpu(i, smt_mask) {
+ for_each_cpu(i, &select_mask) {
struct rq *rq_i = cpu_rq(i);

rq_i->core_pick = NULL;
@@ -4536,7 +4542,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
*/
for_each_class(class) {
again:
- for_each_cpu_wrap(i, smt_mask, cpu) {
+ for_each_cpu_wrap(i, &select_mask, cpu) {
struct rq *rq_i = cpu_rq(i);
struct task_struct *p;

@@ -4600,7 +4608,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
trace_printk("max: %s/%d %lx\n", max->comm, max->pid, max->core_cookie);

if (old_max) {
- for_each_cpu(j, smt_mask) {
+ for_each_cpu(j, &select_mask) {
if (j == i)
continue;

@@ -4625,6 +4633,10 @@ next_class:;

rq->core->core_pick_seq = rq->core->core_task_seq;
next = rq->core_pick;
+
+ /* Something should have been selected for current CPU*/
+ WARN_ON_ONCE(!next);
+
rq->core_sched_seq = rq->core->core_pick_seq;
trace_printk("picked: %s/%d %lx\n", next->comm, next->pid, next->core_cookie);

@@ -4636,7 +4648,7 @@ next_class:;
* their task. This ensures there is no inter-sibling overlap between
* non-matching user state.
*/
- for_each_cpu(i, smt_mask) {
+ for_each_cpu(i, &select_mask) {
struct rq *rq_i = cpu_rq(i);

WARN_ON_ONCE(!rq_i->core_pick);
--
2.28.0.402.g5ffc5be6b7-goog