On Fri, Oct 30, 2015 at 07:26:37PM -0400, Waiman Long wrote:
+++ b/kernel/locking/qspinlock_paravirt.hSo it appears to me the sole purpose of PV_PREV_CHECK_MASK it to avoid
@@ -23,6 +23,19 @@
#define _Q_SLOW_VAL (3U<< _Q_LOCKED_OFFSET)
/*
+ * Queue Node Adaptive Spinning
+ *
+ * A queue node vCPU will stop spinning if the vCPU in the previous node is
+ * not running. The one lock stealing attempt allowed at slowpath entry
+ * mitigates the slight slowdown for non-overcommitted guest with this
+ * aggressive wait-early mechanism.
+ *
+ * The status of the previous node will be checked at fixed interval
+ * controlled by PV_PREV_CHECK_MASK.
+ */
+#define PV_PREV_CHECK_MASK 0xff
+
+/*
* Queue node uses: vcpu_running& vcpu_halted.
* Queue head uses: vcpu_running& vcpu_hashed.
*/
@@ -202,6 +215,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
}
/*
+ * Return true if when it is time to check the previous node which is not
+ * in a running state.
+ */
+static inline bool
+pv_wait_early(struct pv_node *prev, int loop)
+{
+
+ if ((loop& PV_PREV_CHECK_MASK) != 0)
+ return false;
+
+ return READ_ONCE(prev->state) != vcpu_running;
+}
touching the prev->state cacheline too hard. Yet that is not mentioned
anywhere above.
+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)So if prev points to another node, it will never see vcpu_running. Was
{
struct pv_node *pn = (struct pv_node *)node;
+ struct pv_node *pp = (struct pv_node *)prev;
int waitcnt = 0;
int loop;
+ bool wait_early;
/* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
for (;; waitcnt++) {
- for (loop = SPIN_THRESHOLD; loop; loop--) {
+ for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
+ if (pv_wait_early(pp, loop)) {
+ wait_early = true;
+ break;
+ }
cpu_relax();
}
that fully intended?
FYI, I think I've now seen all patches ;-)