Re: [PATCH] x86, kasan: add KASAN checks to atomic operations

From: Dmitry Vyukov
Date: Wed Mar 08 2017 - 11:27:19 EST


On Mon, Mar 6, 2017 at 9:35 PM, Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> On Mon, Mar 06, 2017 at 04:20:18PM +0000, Mark Rutland wrote:
>> > >> So the problem is doing load/stores from asm bits, and GCC
>> > >> (traditionally) doesn't try and interpret APP asm bits.
>> > >>
>> > >> However, could we not write a GCC plugin that does exactly that?
>> > >> Something that interprets the APP asm bits and generates these KASAN
>> > >> bits that go with it?
>
>> I don't think there's much you'll be able to do within the compiler,
>> assuming you mean to derive this from the asm block inputs and outputs.
>
> Nah, I was thinking about a full asm interpreter.
>
>> Those can hide address-generation (e.g. with per-cpu stuff), which the
>> compiler may erroneously be detected as racing.
>>
>> Those may also take fake inputs (e.g. the sp input to arm64's
>> __my_cpu_offset()) which may confuse matters.
>>
>> Parsing the assembly itself will be *extremely* painful due to the way
>> that's set up for run-time patching.
>
> Argh, yah, completely forgot about all that alternative and similar
> nonsense :/



I think if we scope compiler atomic builtins to KASAN/KTSAN/KMSAN (and
consequently x86/arm64) initially, it becomes more realistic. For the
tools we don't care about absolute efficiency and this gets rid of
Will's points (2), (4) and (6) here https://lwn.net/Articles/691295/.
Re (3) I think rmb/wmb can be reasonably replaced with
atomic_thread_fence(acquire/release). Re (5) situation with
correctness becomes better very quickly as more people use them in
user-space. Since KASAN is not intended to be used in production (or
at least such build is expected to crash), we can afford to shake out
any remaining correctness issues in such build. (1) I don't fully
understand, what exactly is the problem with seq_cst?

I've sketched a patch that does it, and did some testing with/without
KASAN on x86_64.

In short, it adds include/linux/atomic_compiler.h which is included
from include/linux/atomic.h when CONFIG_COMPILER_ATOMIC is defined;
and <asm/atomic.h> is not included when CONFIG_COMPILER_ATOMIC is
defined.
For bitops it is similar except that only parts of asm/bitops.h are
selectively disabled when CONFIG_COMPILER_ATOMIC, because it also
defines other stuff.
asm/barriers.h is left intact for now. We don't need it for KASAN. But
for KTSAN we can do similar thing -- selectively disable some of the
barriers in asm/barriers.h (e.g. leaving dma_rmb/wmb per arch).

Such change would allow us to support atomic ops for multiple arches
for all of KASAN/KTSAN/KMSAN.

Thoughts?
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 14635c5ea025..7bcb10544fc1 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -1,6 +1,10 @@
#ifndef _ASM_X86_ATOMIC_H
#define _ASM_X86_ATOMIC_H

+#ifdef CONFIG_COMPILER_ATOMIC
+#error "should not be included"
+#endif
+
#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/alternative.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 854022772c5b..e42b85f1ed75 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -68,6 +68,7 @@
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline void
set_bit(long nr, volatile unsigned long *addr)
{
@@ -81,6 +82,7 @@ set_bit(long nr, volatile unsigned long *addr)
: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
}
}
+#endif

/**
* __set_bit - Set a bit in memory
@@ -106,6 +108,7 @@ static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
* you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
* in order to ensure changes are visible on other processors.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline void
clear_bit(long nr, volatile unsigned long *addr)
{
@@ -119,6 +122,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: "Ir" (nr));
}
}
+#endif

/*
* clear_bit_unlock - Clears a bit in memory
@@ -128,17 +132,20 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
+#endif

static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}

+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
@@ -151,6 +158,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile

// Let everybody know we have it
#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+#endif

/*
* __clear_bit_unlock - Clears a bit in memory
@@ -193,6 +201,7 @@ static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
@@ -205,6 +214,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: "Ir" (nr));
}
}
+#endif

/**
* test_and_set_bit - Set a bit and return its old value
@@ -214,10 +224,12 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
}
+#endif

/**
* test_and_set_bit_lock - Set a bit and return its old value for lock
@@ -226,11 +238,13 @@ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *ad
*
* This is the same as test_and_set_bit on x86.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline bool
test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
return test_and_set_bit(nr, addr);
}
+#endif

/**
* __test_and_set_bit - Set a bit and return its old value
@@ -260,10 +274,12 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
}
+#endif

/**
* __test_and_clear_bit - Clear a bit and return its old value
@@ -313,10 +329,12 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
+#ifndef CONFIG_COMPILER_BITOPS
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
}
+#endif

static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
{
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 898dba2e2e2c..33a87ed3c150 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -63,7 +63,7 @@ struct saved_msrs {
/*
* Be very careful with includes. This header is prone to include loops.
*/
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/tracepoint-defs.h>

extern struct tracepoint __tracepoint_read_msr;
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index e71835bf60a9..5e02d01007d1 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1,7 +1,14 @@
/* Atomic operations usable in machine independent code */
#ifndef _LINUX_ATOMIC_H
#define _LINUX_ATOMIC_H
+
+#if defined(CONFIG_KASAN)
+#define CONFIG_COMPILER_ATOMIC
+#include <linux/atomic_compiler.h>
+#else
#include <asm/atomic.h>
+#endif
+
#include <asm/barrier.h>

/*
diff --git a/include/linux/atomic_compiler.h b/include/linux/atomic_compiler.h
new file mode 100644
index 000000000000..4039761449dd
--- /dev/null
+++ b/include/linux/atomic_compiler.h
@@ -0,0 +1,339 @@
+#ifndef _LINUX_ATOMIC_COMPILER_H
+#define _LINUX_ATOMIC_COMPILER_H
+
+#include <linux/types.h>
+
+/* The 32-bit atomic type */
+
+#define ATOMIC_INIT(i) { (i) }
+
+static inline int atomic_read(const atomic_t *v)
+{
+ return __atomic_load_n(&v->counter, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+ __atomic_store_n(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+ __atomic_fetch_add(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+ __atomic_fetch_sub(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline bool atomic_sub_and_test(int i, atomic_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL) == i;
+}
+
+#define atomic_inc(v) (atomic_add(1, v))
+#define atomic_dec(v) (atomic_sub(1, v))
+
+static inline bool atomic_dec_and_test(atomic_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, 1, __ATOMIC_ACQ_REL) == 1;
+}
+
+static inline bool atomic_inc_and_test(atomic_t *v)
+{
+ return __atomic_fetch_add(&v->counter, 1, __ATOMIC_ACQ_REL) == -1;
+}
+
+static inline bool atomic_add_negative(int i, atomic_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL) + i < 0;
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL) + i;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL) - i;
+}
+
+#define atomic_inc_return(v) (atomic_add_return(1, v))
+#define atomic_dec_return(v) (atomic_sub_return(1, v))
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL);
+}
+
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL);
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ __atomic_compare_exchange_n(&v->counter, &old, new, 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
+ return old;
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+ return __atomic_exchange_n(&v->counter, new, __ATOMIC_ACQ_REL);
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int c, old;
+ c = atomic_read(v);
+ for (;;) {
+ if (unlikely(c == u))
+ break;
+ old = atomic_cmpxchg(v, c, c + a);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c;
+}
+
+#define ATOMIC_OP(op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ __atomic_fetch_##op(&(v)->counter, i, __ATOMIC_RELAXED); \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic_fetch_##op(&(v)->counter, i, __ATOMIC_ACQ_REL); \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP
+
+/* The 64-bit atomic type */
+
+#ifndef CONFIG_64BIT
+typedef struct {
+ u64 __aligned(8) counter;
+} atomic64_t;
+#endif
+
+#define ATOMIC64_INIT(i) { (i) }
+
+static inline u64 atomic64_read(const atomic64_t *v)
+{
+ return __atomic_load_n(&v->counter, __ATOMIC_RELAXED);
+}
+
+static inline void atomic64_set(atomic64_t *v, u64 i)
+{
+ __atomic_store_n(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline void atomic64_add(u64 i, atomic64_t *v)
+{
+ __atomic_fetch_add(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline void atomic64_sub(u64 i, atomic64_t *v)
+{
+ __atomic_fetch_sub(&v->counter, i, __ATOMIC_RELAXED);
+}
+
+static inline bool atomic64_sub_and_test(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL) == i;
+}
+
+#define atomic64_inc(v) (atomic64_add(1, v))
+#define atomic64_dec(v) (atomic64_sub(1, v))
+
+static inline bool atomic64_dec_and_test(atomic64_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, 1, __ATOMIC_ACQ_REL) == 1;
+}
+
+static inline bool atomic64_inc_and_test(atomic64_t *v)
+{
+ return __atomic_fetch_add(&v->counter, 1, __ATOMIC_ACQ_REL) == -1;
+}
+
+static inline bool atomic64_add_negative(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL) + i < 0;
+}
+
+static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL) + i;
+}
+
+static inline u64 atomic64_sub_return(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL) - i;
+}
+
+#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
+#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
+
+static inline u64 atomic64_fetch_add(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_add(&v->counter, i, __ATOMIC_ACQ_REL);
+}
+
+static inline u64 atomic64_fetch_sub(u64 i, atomic64_t *v)
+{
+ return __atomic_fetch_sub(&v->counter, i, __ATOMIC_ACQ_REL);
+}
+
+static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 old, u64 new)
+{
+ __atomic_compare_exchange_n(&v->counter, &old, new, 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
+ return old;
+}
+
+static inline u64 atomic64_xchg(atomic64_t *v, u64 new)
+{
+ return __atomic_exchange_n(&v->counter, new, __ATOMIC_ACQ_REL);
+}
+
+static inline bool atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+ u64 c, old;
+ c = atomic64_read(v);
+ for (;;) {
+ if (unlikely(c == u))
+ break;
+ old = atomic64_cmpxchg(v, c, c + a);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c != u;
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+static inline u64 atomic64_dec_if_positive(atomic64_t *v)
+{
+ u64 c, old, dec;
+ c = atomic64_read(v);
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic64_cmpxchg(v, c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+
+#define ATOMIC64_OP(op) \
+static inline void atomic64_##op(u64 i, atomic64_t *v) \
+{ \
+ __atomic_fetch_##op(&(v)->counter, i, __ATOMIC_RELAXED); \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static inline u64 atomic64_fetch_##op(u64 i, atomic64_t *v) \
+{ \
+ return __atomic_fetch_##op(&(v)->counter, i, __ATOMIC_ACQ_REL); \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP
+
+/* Cmpxchg */
+
+#define xchg(ptr, v) __atomic_exchange_n((ptr), (v), __ATOMIC_ACQ_REL)
+#define xadd(ptr, inc) __atomic_add_fetch((ptr), (inc), __ATOMIC_ACQ_REL)
+
+#define cmpxchg(ptr, old, new) ({ \
+ typeof(old) tmp = old; \
+ __atomic_compare_exchange_n((ptr), &tmp, (new), 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); \
+ tmp; \
+})
+
+#define sync_cmpxchg cmpxchg
+#define cmpxchg_local cmpxchg
+#define cmpxchg64 cmpxchg
+#define cmpxchg64_local cmpxchg
+
+/*
+typedef struct {
+ long v1, v2;
+} __cmpxchg_double_struct;
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ __cmpxchg_double_struct old = {(long)o1, (long)o2}; \
+ __cmpxchg_double_struct new = {(long)n1, (long)n2}; \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
+ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
+ __atomic_compare_exchange_n((__int128 *)(p1), (__int128 *)&old, *(__int128 *)&new, 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); \
+})
+
+#define cmpxchg_double_local cmpxchg_double
+#define system_has_cmpxchg_double() 1
+*/
+
+#undef CONFIG_HAVE_CMPXCHG_DOUBLE
+
+/* 16-bit atomic ops */
+
+static inline short int atomic_inc_short(short int *v)
+{
+ return __atomic_fetch_add(v, 1, __ATOMIC_ACQ_REL) + 1;
+}
+
+/* Barriers */
+/*
+#define barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST)
+#define mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#define wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
+
+#define __smp_mb() mb()
+#define __smp_rmb() rmb()
+#define __smp_wmb() wmb()
+
+#define dma_rmb() mb()
+#define dma_wmb() mb()
+
+#define __smp_store_mb(var, value) __atomic_store_n(&(var), (value), __ATOMIC_SEQ_CST)
+#define __smp_store_release(p, v) __atomic_store_n((p), (v), __ATOMIC_RELEASE)
+#define __smp_load_acquire(p) __atomic_load_n((p), __ATOMIC_ACQUIRE)
+
+#define __smp_mb__before_atomic() mb()
+#define __smp_mb__after_atomic() mb()
+
+#include <asm-generic/barrier.h>
+*/
+
+#endif /* _LINUX_ATOMIC_COMPILER_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a83c822c35c2..1c6b1b925dd9 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -1,5 +1,11 @@
#ifndef _LINUX_BITOPS_H
#define _LINUX_BITOPS_H
+
+#if defined(CONFIG_KASAN)
+#define CONFIG_COMPILER_BITOPS
+#include <linux/bitops_compiler.h>
+#endif
+
#include <asm/types.h>

#ifdef __KERNEL__
diff --git a/include/linux/bitops_compiler.h b/include/linux/bitops_compiler.h
new file mode 100644
index 000000000000..4d2a253776f2
--- /dev/null
+++ b/include/linux/bitops_compiler.h
@@ -0,0 +1,56 @@
+#ifndef _LINUX_BITOPS_COMPILER_H
+#define _LINUX_BITOPS_COMPILER_H
+
+#include <linux/types.h>
+
+static inline void
+set_bit(long nr, volatile unsigned long *addr)
+{
+ __atomic_fetch_or((char *)addr + (nr / 8), 1 << (nr % 8), __ATOMIC_RELAXED);
+}
+
+static inline void
+clear_bit(long nr, volatile unsigned long *addr)
+{
+ __atomic_fetch_and((char *)addr + (nr / 8), ~(1 << (nr % 8)), __ATOMIC_RELAXED);
+}
+
+static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+{
+ __atomic_fetch_and((char *)addr + (nr / 8), ~(1 << (nr % 8)), __ATOMIC_RELEASE);
+}
+
+static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
+{
+ return __atomic_fetch_and((char *)addr + (nr / 8), ~(1 << (nr % 8)), __ATOMIC_RELEASE) < 0;
+}
+
+#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+
+static inline void change_bit(long nr, volatile unsigned long *addr)
+{
+ __atomic_fetch_xor((char *)addr + (nr / 8), 1 << (nr % 8), __ATOMIC_RELAXED);
+}
+
+static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+{
+ return __atomic_fetch_or((char *)addr + (nr / 8), 1 << (nr % 8), __ATOMIC_ACQ_REL) & (1 << (nr % 8));
+}
+
+static inline bool
+test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+{
+ return __atomic_fetch_or((char *)addr + (nr / 8), 1 << (nr % 8), __ATOMIC_ACQUIRE) & (1 << (nr % 8));
+}
+
+static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+{
+ return __atomic_fetch_and((char *)addr + (nr / 8), ~(1 << (nr % 8)), __ATOMIC_ACQ_REL) & (1 << (nr % 8));
+}
+
+static inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
+{
+ return __atomic_fetch_xor((char *)addr + (nr / 8), 1 << (nr % 8), __ATOMIC_ACQ_REL) & (1 << (nr % 8));
+}
+
+#endif /* _LINUX_BITOPS_COMPILER_H */
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index ae92a9e9ba52..3b1e85619ce0 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -12,7 +12,7 @@
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/slab.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
#include <linux/spinlock.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/addr.h>