Breaking up locks is better than implementing high-contention locks, but
if we must have high-contention locks, why not make them automatically
switch between light-weight ticket locks at low contention and queued
locks at high contention?
This commit therefore allows ticket locks to automatically switch between
pure ticketlock and queued-lock operation as needed. If too many CPUs
are spinning on a given ticket lock, a queue structure will be allocated
and the lock will switch to queued-lock operation. When the lock becomes
free, it will switch back into ticketlock operation. The low-order bit
of the head counter is used to indicate that the lock is in queued mode,
which forces an unconditional mismatch between the head and tail counters.
This approach means that the common-case code path under conditions of
low contention is very nearly that of a plain ticket lock.
A fixed number of queueing structures is statically allocated in an
array. The ticket-lock address is used to hash into an initial element,
but if that element is already in use, it moves to the next element. If
the entire array is already in use, continue to spin in ticket mode.
This has been only lightly tested in the kernel, though a userspace
implementation has survived substantial testing.
Signed-off-by: Paul E. McKenney<paulmck@xxxxxxxxxxxxxxxxxx>
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index ad0ad07..cdaefdd 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -7,12 +7,18 @@
#include<linux/types.h>
-#if (CONFIG_NR_CPUS< 256)
+#if (CONFIG_NR_CPUS< 128)
typedef u8 __ticket_t;
typedef u16 __ticketpair_t;
-#else
+#define TICKET_T_CMP_GE(a, b) (UCHAR_MAX / 2>= (unsigned char)((a) - (b)))
+#elif (CONFIG_NR_CPUS< 32768)
typedef u16 __ticket_t;
typedef u32 __ticketpair_t;
+#define TICKET_T_CMP_GE(a, b) (USHRT_MAX / 2>= (unsigned short)((a) - (b)))
+#else
+typedef u32 __ticket_t;
+typedef u64 __ticketpair_t;
+#define TICKET_T_CMP_GE(a, b) (UINT_MAX / 2>= (unsigned int)((a) - (b)))
#endif
+/*
+ * TKT_Q_SWITCH is twice the number of CPUs that must be spinning on a
+ * given ticket lock to motivate switching to spinning on a queue.
+ * The reason that it is twice the number is because the bottom bit of
+ * the ticket is reserved for the bit that indicates that a queue is
+ * associated with the lock.
+ */
+#define TKT_Q_SWITCH (16 * 2)
+
+/*
+ * TKT_Q_NQUEUES is the number of queues to maintain. Large systems
+ * might have multiple highly contended locks, so provide more queues for
+ * systems with larger numbers of CPUs.
+ */
+#define TKT_Q_NQUEUES (DIV_ROUND_UP(NR_CPUS + TKT_Q_SWITCH - 1, TKT_Q_SWITCH) * 2)
+
+/* The queues themselves. */
+struct tkt_q_head tkt_q_heads[TKT_Q_NQUEUES];
+/*
+ * Return a pointer to the queue header associated with the specified lock,
+ * or return NULL if there is no queue for the lock or if the lock's queue
+ * is in transition.
+ */
+static struct tkt_q_head *tkt_q_find_head(arch_spinlock_t *asp)
+{
+ int i;
+ int start;
+
+ start = i = tkt_q_hash(asp);
+ do
+ if (tkt_q_heads[i].ref == asp)
+ return&tkt_q_heads[i];
+ while ((i = tkt_q_next_slot(i)) != start);
+ return NULL;
+}
+/*
+ * Hand the lock off to the first CPU on the queue.
+ */
+void tkt_q_do_wake(arch_spinlock_t *asp)
+{
+ struct tkt_q_head *tqhp;
+ struct tkt_q *tqp;
+
+ /* If the queue is still being set up, wait for it. */
+ while ((tqhp = tkt_q_find_head(asp)) == NULL)
+ cpu_relax();
+
+ for (;;) {
+
+ /* Find the first queue element. */
+ tqp = ACCESS_ONCE(tqhp->spin);
+ if (tqp != NULL)
+ break; /* Element exists, hand off lock. */
+ if (tkt_q_try_unqueue(asp, tqhp))
+ return; /* No element, successfully removed queue. */
+ cpu_relax();
+ }
+ if (ACCESS_ONCE(tqhp->head_tkt) != -1)
+ ACCESS_ONCE(tqhp->head_tkt) = -1;