[PATCH] x86: force inlining of atomic ops

From: Denys Vlasenko
Date: Fri May 08 2015 - 06:26:45 EST


With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined:

$ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^ *1 ' | sort -rn
473 000000000000000b t spin_unlock_irqrestore
449 000000000000005f t rcu_read_unlock
355 0000000000000009 t atomic_inc <== THIS
353 000000000000006e t rcu_read_lock
350 0000000000000075 t rcu_read_lock_sched_held
291 000000000000000b t spin_unlock
266 0000000000000019 t arch_local_irq_restore
215 000000000000000b t spin_lock
180 0000000000000011 t kzalloc
165 0000000000000012 t list_add_tail
161 0000000000000019 t arch_local_save_flags
153 0000000000000016 t test_and_set_bit
134 000000000000000b t spin_unlock_irq
134 0000000000000009 t atomic_dec <== THIS
130 000000000000000b t spin_unlock_bh
122 0000000000000010 t brelse
120 0000000000000016 t test_and_clear_bit
120 000000000000000b t spin_lock_irq
119 000000000000001e t get_dma_ops
117 0000000000000053 t cpumask_next
116 0000000000000036 t kref_get
114 000000000000001a t schedule_work
106 000000000000000b t spin_lock_bh
103 0000000000000019 t arch_local_irq_disable
...

Note sizes of marked functions. They are merely 9 bytes long!
Selecting function with 'atomic' in their names:

355 0000000000000009 t atomic_inc
134 0000000000000009 t atomic_dec
98 0000000000000014 t atomic_dec_and_test
31 000000000000000e t atomic_add_return
27 000000000000000a t atomic64_inc
26 000000000000002f t kmap_atomic
24 0000000000000009 t atomic_add
12 0000000000000009 t atomic_sub
10 0000000000000021 t __atomic_add_unless
10 000000000000000a t atomic64_add
5 000000000000001f t __atomic_add_unless.constprop.7
5 000000000000000a t atomic64_dec
4 000000000000001f t __atomic_add_unless.constprop.18
4 000000000000001f t __atomic_add_unless.constprop.12
4 000000000000001f t __atomic_add_unless.constprop.10
3 000000000000001f t __atomic_add_unless.constprop.13
3 0000000000000011 t atomic64_add_return
2 000000000000001f t __atomic_add_unless.constprop.9
2 000000000000001f t __atomic_add_unless.constprop.8
2 000000000000001f t __atomic_add_unless.constprop.6
2 000000000000001f t __atomic_add_unless.constprop.5
2 000000000000001f t __atomic_add_unless.constprop.3
2 000000000000001f t __atomic_add_unless.constprop.22
2 000000000000001f t __atomic_add_unless.constprop.14
2 000000000000001f t __atomic_add_unless.constprop.11
2 000000000000001e t atomic_dec_if_positive
2 0000000000000014 t atomic_inc_and_test
2 0000000000000011 t atomic_add_return.constprop.4
2 0000000000000011 t atomic_add_return.constprop.17
2 0000000000000011 t atomic_add_return.constprop.16
2 000000000000000d t atomic_inc.constprop.4
2 000000000000000c t atomic_cmpxchg


This patch fixes this for x86 atomic ops via s/inline/__always_inline/.
This decreases allyesconfig kernel by about 25k:

text data bss dec hex filename
82399481 22255416 20627456 125282353 777a831 vmlinux.before
82375570 22255544 20627456 125258570 7774b4a vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Borislav Petkov <bp@xxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
CC: Frederic Weisbecker <fweisbec@xxxxxxxxx>
CC: Alexei Starovoitov <ast@xxxxxxxxxxxx>
CC: Will Drewry <wad@xxxxxxxxxxxx>
CC: Kees Cook <keescook@xxxxxxxxxxxx>
CC: x86@xxxxxxxxxx
CC: linux-kernel@xxxxxxxxxxxxxxx
---
arch/x86/include/asm/atomic.h | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e5cd12..e9ddabc 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -22,7 +22,7 @@
*
* Atomically reads the value of @v.
*/
-static inline int atomic_read(const atomic_t *v)
+static __always_inline int atomic_read(const atomic_t *v)
{
return ACCESS_ONCE((v)->counter);
}
@@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void atomic_set(atomic_t *v, int i)
+static __always_inline void atomic_set(atomic_t *v, int i)
{
v->counter = i;
}
@@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i)
*
* Atomically adds @i to @v.
*/
-static inline void atomic_add(int i, atomic_t *v)
+static __always_inline void atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
@@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void atomic_sub(int i, atomic_t *v)
+static __always_inline void atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
@@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
}
@@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
-static inline void atomic_inc(atomic_t *v)
+static __always_inline void atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
@@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
-static inline void atomic_dec(atomic_t *v)
+static __always_inline void atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
@@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-static inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline int atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
@@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline int atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
}
@@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline int atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
}
@@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static inline int atomic_add_return(int i, atomic_t *v)
+static __always_inline int atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
@@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
*
* Atomically subtracts @i from @v and returns @v - @i
*/
-static inline int atomic_sub_return(int i, atomic_t *v)
+static __always_inline int atomic_sub_return(int i, atomic_t *v)
{
return atomic_add_return(-i, v);
}
@@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))

-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
}
@@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
* Atomically adds 1 to @v
* Returns the new value of @u
*/
-static inline short int atomic_inc_short(short int *v)
+static __always_inline short int atomic_inc_short(short int *v)
{
asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
return *v;
--
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/