[PATCH v1 2/2] powerpc/atomics: Use immediate operand when possible

From: Christophe Leroy
Date: Thu Apr 08 2021 - 11:33:54 EST


Today we get the following code generation for atomic operations:

c001bb2c: 39 20 00 01 li r9,1
c001bb30: 7d 40 18 28 lwarx r10,0,r3
c001bb34: 7d 09 50 50 subf r8,r9,r10
c001bb38: 7d 00 19 2d stwcx. r8,0,r3

c001c7a8: 39 40 00 01 li r10,1
c001c7ac: 7d 00 18 28 lwarx r8,0,r3
c001c7b0: 7c ea 42 14 add r7,r10,r8
c001c7b4: 7c e0 19 2d stwcx. r7,0,r3

By allowing GCC to choose between immediate or regular operation,
we get:

c001bb2c: 7d 20 18 28 lwarx r9,0,r3
c001bb30: 39 49 ff ff addi r10,r9,-1
c001bb34: 7d 40 19 2d stwcx. r10,0,r3
--
c001c7a4: 7d 40 18 28 lwarx r10,0,r3
c001c7a8: 39 0a 00 01 addi r8,r10,1
c001c7ac: 7d 00 19 2d stwcx. r8,0,r3

For "and", the dot form has to be used because "andi" doesn't exist.

For logical operations we use unsigned 16 bits immediate.
For arithmetic operations we use signed 16 bits immediate.

On pmac32_defconfig, it reduces the text by approx another 8 kbytes.

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
---
arch/powerpc/include/asm/atomic.h | 56 +++++++++++++++----------------
1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8..e4b5e2f25ba7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -37,62 +37,62 @@ static __inline__ void atomic_set(atomic_t *v, int i)
__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
}

-#define ATOMIC_OP(op, asm_op) \
+#define ATOMIC_OP(op, asm_op, dot, sign) \
static __inline__ void atomic_##op(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%3 # atomic_" #op "\n" \
- #asm_op " %0,%2,%0\n" \
+ #asm_op "%I2" dot " %0,%0,%2\n" \
" stwcx. %0,0,%3 \n" \
" bne- 1b\n" \
- : "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
+ : "=&b" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
: "cc"); \
} \

-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, dot, sign) \
static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
- #asm_op " %0,%2,%0\n" \
+ #asm_op "%I2" dot " %0,%0,%2\n" \
" stwcx. %0,0,%3\n" \
" bne- 1b\n" \
- : "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
+ : "=&b" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
: "cc"); \
\
return t; \
}

-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign) \
static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
{ \
int res, t; \
\
__asm__ __volatile__( \
"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
- #asm_op " %1,%3,%0\n" \
+ #asm_op "%I3" dot " %1,%0,%3\n" \
" stwcx. %1,0,%4\n" \
" bne- 1b\n" \
- : "=&r" (res), "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
+ : "=&b" (res), "=&r" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
: "cc"); \
\
return res; \
}

-#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, dot, sign) \
+ ATOMIC_OP(op, asm_op, dot, sign) \
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op, dot, sign) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign)

-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, subf)
+ATOMIC_OPS(add, add, "", I)
+ATOMIC_OPS(sub, sub, "", I)

#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
@@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf)
#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed

#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, dot, sign) \
+ ATOMIC_OP(op, asm_op, dot, sign) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign)

-ATOMIC_OPS(and, and)
-ATOMIC_OPS(or, or)
-ATOMIC_OPS(xor, xor)
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)

#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
@@ -238,14 +238,14 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
cmpw 0,%0,%3 \n\
beq 2f \n\
- add %0,%2,%0 \n"
+ add%I2 %0,%0,%2 \n"
" stwcx. %0,0,%1 \n\
bne- 1b \n"
PPC_ATOMIC_EXIT_BARRIER
-" subf %0,%2,%0 \n\
+" sub%I2 %0,%0,%2 \n\
2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (a), "r" (u)
+ : "=&b" (t)
+ : "r" (&v->counter), "rI" (a), "r" (u)
: "cc", "memory");

return t;
--
2.25.0