You could then augment that with [cmp]xchg_{acquire,release} asAn alternate implementation is
appropriate.
+/*
* In order to acquire the lock, the caller should declare a local node and
* pass a reference of the node to this function in addition to the lock.
* If the lock has already been acquired, then this will proceed to spin
@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
- prev = xchg(lock, node);
+ /* xchg() provides a memory barrier */
+ prev = xchg_acquire(lock, node);
if (likely(prev == NULL)) {
/* Lock acquired */
return;
}
ACCESS_ONCE(prev->next) = node;
- smp_wmb();
- /* Wait until the lock holder passes the lock down */
- while (!ACCESS_ONCE(node->locked))
+ /*
+ * Wait until the lock holder passes the lock down.
+ * Using smp_load_acquire() provides a memory barrier that
+ * ensures subsequent operations happen after the lock is acquired.
+ */
+ while (!(smp_load_acquire(&node->locked)))
arch_mutex_cpu_relax();
while (!ACCESS_ONCE(node->locked))
arch_mutex_cpu_relax();
smp_load_acquire(&node->locked);
Leaving the smp_load_acquire at the end to provide appropriate barrier.
Will that be acceptable?
Tim