[PATCH v2 8/9] atomic,x86: Alternative atomic_*_overflow() scheme

From: Peter Zijlstra
Date: Fri Dec 10 2021 - 11:27:44 EST


Shift the overflow range from [0,INT_MIN] to [-1,INT_MIN], this allows
optimizing atomic_inc_overflow() to use "jle" to detect increment
from free-or-negative (with -1 being the new free and it's increment
being 0 which sets ZF).

This then obviously changes atomic_dec*_overflow() since it must now
detect the 0->-1 transition rather than the 1->0. Luckily this is
reflected in the carry flag (since we need to borrow to decrement 0).
However this means decrement must now use the SUB instruction with a
literal, since DEC doesn't set CF.

This then gives the following primitives:

[-1, INT_MIN] [0, INT_MIN]

inc() inc()
lock inc %[var] mov $-1, %[reg]
jle error-free-or-negative lock xadd %[reg], %[var]
test %[reg], %[reg]
jle error-zero-or-negative

dec() dec()
lock sub $1, %[var] lock dec %[var]
jc error-to-free jle error-zero-or-negative
jl error-from-negative

dec_and_test() dec_and_test()
lock sub $1, %[var] lock dec %[var]
jc do-free jl error-from-negative
jl error-from-negative je do-free

Make sure to set ATOMIC_OVERFLOW_OFFSET to 1 such that other code
interacting with these primitives can re-center 0.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/atomic.h | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -263,21 +263,31 @@ static __always_inline int arch_atomic_f
}
#define arch_atomic_fetch_xor arch_atomic_fetch_xor

-#define arch_atomic_dec_overflow(_v, _label) \
- asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
+#define ATOMIC_OVERFLOW_OFFSET 1
+
+#define arch_atomic_inc_overflow(_v, _label) \
+ asm_volatile_goto(LOCK_PREFIX "incl %[var]\n\r" \
"jle %l1" \
: : [var] "m" ((_v)->counter) \
: "memory" \
: _label)

+#define arch_atomic_dec_overflow(_v, _label) \
+ asm_volatile_goto(LOCK_PREFIX "subl $1, %[var]\n\t" \
+ "jc %l1\n\t" \
+ "jl %l1" \
+ : : [var] "m" ((_v)->counter) \
+ : "memory" \
+ : _label)
+
#define arch_atomic_dec_and_test_overflow(_v, _label) \
({ \
__label__ __zero; \
__label__ __out; \
bool __ret = false; \
- asm_volatile_goto(LOCK_PREFIX "decl %[var]\n\t" \
- "jl %l2\n\t" \
- "je %l[__zero]" \
+ asm_volatile_goto(LOCK_PREFIX "subl $1, %[var]\n\t" \
+ "jc %l[__zero]\n\t" \
+ "jl %l2" \
: : [var] "m" ((_v)->counter) \
: "memory" \
: __zero, _label); \