[PATCH] sched,nohz: Change rq->nr_running always using wrappers

From: Kirill Tkhai
Date: Thu May 08 2014 - 19:00:24 EST


Sometimes nr_running may cross 2 but interrupt is not being
sent to rq's cpu. In this case we don't reenable timer.
Looks like, this may be a reason of rare unexpected effects,
if nohz is enabled.

Patch replaces all places of direct changing of nr_running
and makes add_nr_running() caring about crossing border.

Signed-off-by: Kirill Tkhai <tkhai@xxxxxxxxx>
CC: Frederic Weisbecker <fweisbec@xxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/sched/deadline.c | 4 ++--
kernel/sched/fair.c | 8 ++++----
kernel/sched/rt.c | 4 ++--
kernel/sched/sched.h | 12 +++++++-----
kernel/sched/stop_task.c | 4 ++--
5 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 800e99b..e0a04ae 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -741,7 +741,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

WARN_ON(!dl_prio(prio));
dl_rq->dl_nr_running++;
- inc_nr_running(rq_of_dl_rq(dl_rq));
+ add_nr_running(rq_of_dl_rq(dl_rq), 1);

inc_dl_deadline(dl_rq, deadline);
inc_dl_migration(dl_se, dl_rq);
@@ -755,7 +755,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
WARN_ON(!dl_prio(prio));
WARN_ON(!dl_rq->dl_nr_running);
dl_rq->dl_nr_running--;
- dec_nr_running(rq_of_dl_rq(dl_rq));
+ sub_nr_running(rq_of_dl_rq(dl_rq), 1);

dec_dl_deadline(dl_rq, dl_se->deadline);
dec_dl_migration(dl_se, dl_rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 28ccf50..5bf705d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3325,7 +3325,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
}

if (!se)
- rq->nr_running -= task_delta;
+ sub_nr_running(rq, task_delta);

cfs_rq->throttled = 1;
cfs_rq->throttled_clock = rq_clock(rq);
@@ -3376,7 +3376,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
}

if (!se)
- rq->nr_running += task_delta;
+ add_nr_running(rq, task_delta);

/* determine whether we need to wake up potentially idle cpu */
if (rq->curr == rq->idle && rq->cfs.nr_running)
@@ -3908,7 +3908,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

if (!se) {
update_rq_runnable_avg(rq, rq->nr_running);
- inc_nr_running(rq);
+ add_nr_running(rq, 1);
}
hrtick_update(rq);
}
@@ -3968,7 +3968,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}

if (!se) {
- dec_nr_running(rq);
+ sub_nr_running(rq, 1);
update_rq_runnable_avg(rq, 1);
}
hrtick_update(rq);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7795e29..0ebfd7a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -973,7 +973,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

BUG_ON(!rq->nr_running);

- rq->nr_running -= rt_rq->rt_nr_running;
+ sub_nr_running(rq, rt_rq->rt_nr_running);
rt_rq->rt_queued = 0;
}

@@ -989,7 +989,7 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
return;

- rq->nr_running += rt_rq->rt_nr_running;
+ add_nr_running(rq, rt_rq->rt_nr_running);
rt_rq->rt_queued = 1;
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b2cbe81..600e229 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1206,12 +1206,14 @@ extern void update_idle_cpu_load(struct rq *this_rq);

extern void init_task_runnable_average(struct task_struct *p);

-static inline void inc_nr_running(struct rq *rq)
+static inline void add_nr_running(struct rq *rq, unsigned count)
{
- rq->nr_running++;
+ unsigned prev_nr = rq->nr_running;
+
+ rq->nr_running = prev_nr + count;

#ifdef CONFIG_NO_HZ_FULL
- if (rq->nr_running == 2) {
+ if (prev_nr < 2 && rq->nr_running >= 2) {
if (tick_nohz_full_cpu(rq->cpu)) {
/* Order rq->nr_running write against the IPI */
smp_wmb();
@@ -1221,9 +1223,9 @@ static inline void inc_nr_running(struct rq *rq)
#endif
}

-static inline void dec_nr_running(struct rq *rq)
+static inline void sub_nr_running(struct rq *rq, unsigned count)
{
- rq->nr_running--;
+ rq->nr_running -= count;
}

static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index d6ce65d..bfe0eda 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -41,13 +41,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev)
static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
- inc_nr_running(rq);
+ add_nr_running(rq, 1);
}

static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
- dec_nr_running(rq);
+ sub_nr_running(rq, 1);
}

static void yield_task_stop(struct rq *rq)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/