[RFC PATCH 6/7] locking/rtqspinlock: Voluntarily yield CPU when need_sched()

From: Waiman Long
Date: Tue Jan 03 2017 - 13:01:49 EST


Ideally we want the CPU to be preemptible even when inside or waiting
for a lock. We cannot make it preemptible when inside a lock critical
section, but we can try to make the task voluntarily yield the CPU
when waiting for a lock.

This patch checks the need_sched() flag and yields the CPU when the
preemption count is 1. IOW, the spin_lock() call isn't done in a
region that doesn't allow preemption. Otherwise, it will just perform
RT spinning with a minimum priority of 1.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/locking/qspinlock_rt.h | 68 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/qspinlock_rt.h b/kernel/locking/qspinlock_rt.h
index 0c4d051..18ec1f8 100644
--- a/kernel/locking/qspinlock_rt.h
+++ b/kernel/locking/qspinlock_rt.h
@@ -43,6 +43,16 @@
* it will have to break out of the MCS wait queue just like what is done
* in the OSQ lock. Then it has to retry RT spinning if it has been boosted
* to RT priority.
+ *
+ * Another RT requirement is that the CPU need to be preemptible even when
+ * waiting for a spinlock. If the task has already acquired the lock, we
+ * will let it run to completion to release the lock and reenable preemption.
+ * For non-nested spinlock, a spinlock waiter will periodically check
+ * need_resched flag to see if it should break out of the waiting loop and
+ * yield the CPU as long as the preemption count indicates just one
+ * preempt_disabled(). For nested spinlock with outer lock acquired, it will
+ * boost its priority to the highest RT priority level to try to acquire the
+ * inner lock, finish up its work, release the locks and reenable preemption.
*/
#include <linux/sched.h>

@@ -51,6 +61,15 @@
#endif

/*
+ * Rescheduling is only needed when it is in the task context, the
+ * PREEMPT_NEED_RESCHED flag is set and the preemption count is one.
+ * If only the TIF_NEED_RESCHED flag is set, it will be moved to RT
+ * spinning with a minimum priority of 1.
+ */
+#define rt_should_resched() (preempt_count() == \
+ (PREEMPT_OFFSET | PREEMPT_NEED_RESCHED))
+
+/*
* For proper unqueuing from the MCS wait queue, we need to store the encoded
* tail code as well the previous node pointer into the extra MCS node. Since
* CPUs in interrupt context won't use the per-CPU MCS nodes anymore. So only
@@ -133,9 +152,12 @@ static bool __rt_spin_trylock(struct qspinlock *lock,
if (!task)
min_prio = in_nmi() ? MAX_RT_PRIO + 1
: in_irq() ? MAX_RT_PRIO : 1;
+ else if (need_resched() && !min_prio)
+ min_prio = 1;
if (!(prio = rt_task_priority(task, min_prio)))
return false;

+
/*
* Spin on the lock and try to set its priority into the pending byte.
*/
@@ -189,6 +211,33 @@ static bool __rt_spin_trylock(struct qspinlock *lock,
prio = MAX(ol ? ol->pending : 0,
rt_task_priority(task, min_prio));

+ /*
+ * If another task needs this CPU, we will yield it if in
+ * the process context and it is not a nested spinlock call.
+ * Otherwise, we will raise our RT priority to try to get
+ * the lock ASAP.
+ */
+ if (!task || !rt_should_resched())
+ continue;
+
+ if (outerlock) {
+ if (min_prio < MAX_RT_PRIO)
+ min_prio = MAX_RT_PRIO;
+ continue;
+ }
+
+ /*
+ * In the unlikely event that we need to relinquish the CPU,
+ * we need to make sure that we are not the highest priority
+ * task waiting for the lock.
+ */
+ if (mypdprio) {
+ lockpend = READ_ONCE(l->locked_pending);
+ pdprio = (u8)(lockpend >> _Q_PENDING_OFFSET);
+ if (pdprio == mypdprio)
+ cmpxchg_relaxed(&l->pending, pdprio, 0);
+ }
+ schedule_preempt_disabled();
}
return true;
}
@@ -293,7 +342,7 @@ static bool rt_wait_node_or_unqueue(struct qspinlock *lock,
rt_write_prev(node, prev); /* Save previous node pointer */

while (!READ_ONCE(node->locked)) {
- if (rt_task_priority(current, 0))
+ if (rt_task_priority(current, 0) || need_resched())
goto unqueue;
cpu_relax();
}
@@ -354,6 +403,12 @@ static bool rt_wait_node_or_unqueue(struct qspinlock *lock,
*/
__this_cpu_dec(mcs_nodes[0].count);

+ /*
+ * Yield the CPU if needed by another task with the right condition.
+ */
+ if (rt_should_resched())
+ schedule_preempt_disabled();
+
return true; /* Need to retry RT spinning */
}

@@ -385,9 +440,10 @@ static u32 rt_spin_lock_or_retry(struct qspinlock *lock,
}
/*
* We need to break out of the non-RT wait queue and do
- * RT spinnning if we become an RT task.
+ * RT spinnning if we become an RT task or another task needs
+ * the CPU.
*/
- if (rt_task_priority(current, 0)) {
+ if (rt_task_priority(current, 0) || need_resched()) {
retry = true;
goto unlock;
}
@@ -427,6 +483,12 @@ static u32 rt_spin_lock_or_retry(struct qspinlock *lock,
*/
__this_cpu_dec(mcs_nodes[0].count);

+ /*
+ * Yield the CPU if needed by another task with the right condition.
+ */
+ if (retry && rt_should_resched())
+ schedule_preempt_disabled();
+
return retry ? RT_RETRY : 1;
}

--
1.8.3.1