[PATCH] sched/deadline/rtmutex: Fix a PI crash for deadline tasks
From: Xunlei Pang
Date: Fri Apr 01 2016 - 07:01:24 EST
I found a kernel crash while playing with deadline PI rtmutex.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
PGD 232a75067 PUD 230947067 PMD 0
Oops: 0000 [#1] SMP
CPU: 1 PID: 10994 Comm: a.out Not tainted
Call Trace:
[<ffffffff810cf8aa>] ? enqueue_task_dl+0x2a/0x320
[<ffffffff810b658c>] enqueue_task+0x2c/0x80
[<ffffffff810ba763>] activate_task+0x23/0x30
[<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260
[<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20
[<ffffffff8164e783>] __schedule+0xd3/0x900
[<ffffffff8164efd9>] schedule+0x29/0x70
[<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0
[<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190
[<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60
[<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390
[<ffffffff810cfa15>] ? enqueue_task_dl+0x195/0x320
[<ffffffff810d0bac>] ? prio_changed_dl+0x6c/0x90
[<ffffffff810ed8b0>] do_futex+0x190/0x5b0
[<ffffffff810edd50>] SyS_futex+0x80/0x180
[<ffffffff8165a089>] system_call_fastpath+0x16/0x1b
RIP [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
are only protected by pi_lock when operating pi waiters, while
rt_mutex_get_top_task() will access them with rq lock held but
not holding pi_lock.
It's hard for rt_mutex_get_top_task() to hold pi_lock, so the
patch ensures rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
lock rq when operating "pi_waiters" and "pi_waiters_leftmost".
We need this iff lock owner has the deadline priority.
Signed-off-by: Xunlei Pang <xlpang@xxxxxxxxxx>
---
include/linux/sched/deadline.h | 3 +++
kernel/locking/rtmutex.c | 18 ++++++++++++++++++
kernel/sched/deadline.c | 17 +++++++++++++++++
3 files changed, 38 insertions(+)
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2a..3083f6b 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -26,4 +26,7 @@ static inline bool dl_time_before(u64 a, u64 b)
return (s64)(a - b) < 0;
}
+extern void *dl_pi_waiters_lock(struct task_struct *p);
+extern void dl_pi_waiters_unlock(void *lockdata);
+
#endif /* _SCHED_DEADLINE_H */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e74660..0fb247a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -224,6 +224,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
+ void *dl_lockdata = NULL;
while (*link) {
parent = *link;
@@ -236,24 +237,38 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
}
}
+ if (dl_task(task))
+ dl_lockdata = dl_pi_waiters_lock(task);
+
if (leftmost)
task->pi_waiters_leftmost = &waiter->pi_tree_entry;
rb_link_node(&waiter->pi_tree_entry, parent, link);
rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
+
+ if (dl_lockdata)
+ dl_pi_waiters_unlock(dl_lockdata);
}
static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
+ void *dl_lockdata = NULL;
+
if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
return;
+ if (dl_task(task))
+ dl_lockdata = dl_pi_waiters_lock(task);
+
if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
+
+ if (dl_lockdata)
+ dl_pi_waiters_unlock(dl_lockdata);
}
/*
@@ -271,6 +286,9 @@ int rt_mutex_getprio(struct task_struct *task)
task->normal_prio);
}
+/*
+ * rq->lock of @task must be held.
+ */
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
if (likely(!task_has_pi_waiters(task)))
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a3048fa..7b8aa93 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -926,6 +926,23 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
__dequeue_dl_entity(dl_se);
}
+/*
+ * dl_pi_waiters_lock()/dl_pi_waiters_unlock() are needed by
+ * rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() to protect
+ * PI waiters accessed by rt_mutex_get_top_task().
+ */
+void *dl_pi_waiters_lock(struct task_struct *p)
+{
+ lockdep_assert_held(&p->pi_lock);
+
+ return __task_rq_lock(p);
+}
+
+void dl_pi_waiters_unlock(void *lockdata)
+{
+ __task_rq_unlock((struct rq *)lockdata);
+}
+
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
--
1.8.3.1