[PATCH] sched: Make dl_task_time() use task_rq_lock()

From: Peter Zijlstra
Date: Tue Feb 17 2015 - 07:31:49 EST

Next message: Michael S. Tsirkin: "Re: [PATCH] vhost: support upto 509 memory regions"
Previous message: Yun Wu (Abel): "Re: [PATCH v2 5/6] irqchip: gicv3-its: add support for power down"
In reply to: Kirill Tkhai: "Re: [PATCH 1/2] sched: Move __task_rq_{, un}lock() to kernel/sched/sched.h"
Next in thread: Peter Zijlstra: "Re: [PATCH] sched: Make dl_task_time() use task_rq_lock()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Tue, Feb 17, 2015 at 02:33:58PM +0300, Kirill Tkhai wrote:

> So, we move task_rq_lock() to sched.h, and dl_task_timer() uses it?

Yep, like this. I've also modified your earlier patch to dl_task_time()
back to its original form.

---
Subject: sched: Make dl_task_time() use task_rq_lock()
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Tue Feb 17 13:22:25 CET 2015

Kirill reported that a dl task can be throttled and dequeued at the
same time. This happens, when it becomes throttled in schedule(),
which is called to go to sleep:

current->state = TASK_INTERRUPTIBLE;
schedule()
deactivate_task()
dequeue_task_dl()
update_curr_dl()
start_dl_timer()
__dequeue_task_dl()
prev->on_rq = 0;

This invalidates the assumption from commit 0f397f2c90ce ("sched/dl:
Fix race in dl_task_timer()"):

"The only reason we don't strictly need ->pi_lock now is because
we're guaranteed to have p->state == TASK_RUNNING here and are
thus free of ttwu races".

And therefore we have to use the full task_rq_lock() here.

This further amends the fact that we forgot to update the rq lock loop
for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach
scheduler to understand TASK_ON_RQ_MIGRATING state").

Reported-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 76 ------------------------------------------------
kernel/sched/deadline.c | 12 +------
kernel/sched/sched.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 79 insertions(+), 85 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,82 +307,6 @@ __read_mostly int scheduler_running;
int sysctl_sched_rt_runtime = 950000;

/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
- __acquires(rq->lock)
-{
- struct rq *rq;
-
- lockdep_assert_held(&p->pi_lock);
-
- for (;;) {
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
- return rq;
- raw_spin_unlock(&rq->lock);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
- __acquires(p->pi_lock)
- __acquires(rq->lock)
-{
- struct rq *rq;
-
- for (;;) {
- raw_spin_lock_irqsave(&p->pi_lock, *flags);
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- /*
- * move_queued_task() task_rq_lock()
- *
- * ACQUIRE (rq->lock)
- * [S] ->on_rq = MIGRATING [L] rq = task_rq()
- * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
- * [S] ->cpu = new_cpu [L] task_rq()
- * [L] ->on_rq
- * RELEASE (rq->lock)
- *
- * If we observe the old cpu in task_rq_lock, the acquire of
- * the old rq->lock will fully serialize against the stores.
- *
- * If we observe the new cpu in task_rq_lock, the acquire will
- * pair with the WMB to ensure we must then also see migrating.
- */
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
- return rq;
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-static void __task_rq_unlock(struct rq *rq)
- __releases(rq->lock)
-{
- raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
- __releases(rq->lock)
- __releases(p->pi_lock)
-{
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
-/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
static struct rq *this_rq_lock(void)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_time
struct sched_dl_entity,
dl_timer);
struct task_struct *p = dl_task_of(dl_se);
+ unsigned long flags;
struct rq *rq;
-again:
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);

- if (rq != task_rq(p)) {
- /* Task was moved, retrying. */
- raw_spin_unlock(&rq->lock);
- goto again;
- }
+ rq = task_rq_lock(current, &flags);

/*
* We need to take care of several possible races here:
@@ -555,7 +549,7 @@ static enum hrtimer_restart dl_task_time
push_dl_task(rq);
#endif
unlock:
- raw_spin_unlock(&rq->lock);
+ task_rq_unlock(rq, current, &flags);

return HRTIMER_NORESTART;
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1484,6 +1484,82 @@ static inline void double_raw_lock(raw_s
}

/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ lockdep_assert_held(&p->pi_lock);
+
+ for (;;) {
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ return rq;
+ raw_spin_unlock(&rq->lock);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+ __acquires(p->pi_lock)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ for (;;) {
+ raw_spin_lock_irqsave(&p->pi_lock, *flags);
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ /*
+ * move_queued_task() task_rq_lock()
+ *
+ * ACQUIRE (rq->lock)
+ * [S] ->on_rq = MIGRATING [L] rq = task_rq()
+ * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
+ * [S] ->cpu = new_cpu [L] task_rq()
+ * [L] ->on_rq
+ * RELEASE (rq->lock)
+ *
+ * If we observe the old cpu in task_rq_lock, the acquire of
+ * the old rq->lock will fully serialize against the stores.
+ *
+ * If we observe the new cpu in task_rq_lock, the acquire will
+ * pair with the WMB to ensure we must then also see migrating.
+ */
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ return rq;
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+ __releases(rq->lock)
+{
+ raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+ __releases(rq->lock)
+ __releases(p->pi_lock)
+{
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
+/*
* double_rq_lock - safely lock two runqueues
*
* Note this does not disable interrupts like task_rq_lock,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Michael S. Tsirkin: "Re: [PATCH] vhost: support upto 509 memory regions"
Previous message: Yun Wu (Abel): "Re: [PATCH v2 5/6] irqchip: gicv3-its: add support for power down"
In reply to: Kirill Tkhai: "Re: [PATCH 1/2] sched: Move __task_rq_{, un}lock() to kernel/sched/sched.h"
Next in thread: Peter Zijlstra: "Re: [PATCH] sched: Make dl_task_time() use task_rq_lock()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]