Re: [PATCH 10/17] RISC-V: Atomic and Locking Code
From: Palmer Dabbelt
Date: Wed Jul 12 2017 - 13:18:05 EST
On Wed, 12 Jul 2017 05:40:49 PDT (-0700), boqun.feng@xxxxxxxxx wrote:
> On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote:
> [...]
>> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
>> new file mode 100644
>> index 000000000000..b0a0c76e966a
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/bitops.h
>> @@ -0,0 +1,216 @@
>> +/*
>> + * Copyright (C) 2012 Regents of the University of California
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * as published by the Free Software Foundation, version 2.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#ifndef _ASM_RISCV_BITOPS_H
>> +#define _ASM_RISCV_BITOPS_H
>> +
>> +#ifndef _LINUX_BITOPS_H
>> +#error "Only <linux/bitops.h> can be included directly"
>> +#endif /* _LINUX_BITOPS_H */
>> +
>> +#include <linux/compiler.h>
>> +#include <linux/irqflags.h>
>> +#include <asm/barrier.h>
>> +#include <asm/bitsperlong.h>
>> +
>> +#ifndef smp_mb__before_clear_bit
>> +#define smp_mb__before_clear_bit() smp_mb()
>> +#define smp_mb__after_clear_bit() smp_mb()
>> +#endif /* smp_mb__before_clear_bit */
>> +
>> +#include <asm-generic/bitops/__ffs.h>
>> +#include <asm-generic/bitops/ffz.h>
>> +#include <asm-generic/bitops/fls.h>
>> +#include <asm-generic/bitops/__fls.h>
>> +#include <asm-generic/bitops/fls64.h>
>> +#include <asm-generic/bitops/find.h>
>> +#include <asm-generic/bitops/sched.h>
>> +#include <asm-generic/bitops/ffs.h>
>> +
>> +#include <asm-generic/bitops/hweight.h>
>> +
>> +#if (BITS_PER_LONG == 64)
>> +#define __AMO(op) "amo" #op ".d"
>> +#elif (BITS_PER_LONG == 32)
>> +#define __AMO(op) "amo" #op ".w"
>> +#else
>> +#error "Unexpected BITS_PER_LONG"
>> +#endif
>> +
>
> So the test_and_{set,change,clear}_bit() have the similar semantics as
> cmpxchg(), so
>
>> +#define __test_and_op_bit(op, mod, nr, addr) \
>> +({ \
>> + unsigned long __res, __mask; \
>> + __mask = BIT_MASK(nr); \
>> + __asm__ __volatile__ ( \
>> + __AMO(op) " %0, %2, %1" \
>
> ... "aqrl" bit is needed here, and
>
>> + : "=r" (__res), "+A" (addr[BIT_WORD(nr)]) \
>> + : "r" (mod(__mask))); \
>
> ... "memory" clobber is needed here.
>
>> + ((__res & __mask) != 0); \
>> +})
>> +
>> +#define __op_bit(op, mod, nr, addr) \
>> + __asm__ __volatile__ ( \
>> + __AMO(op) " zero, %1, %0" \
>> + : "+A" (addr[BIT_WORD(nr)]) \
>> + : "r" (mod(BIT_MASK(nr))))
>> +
>> +/* Bitmask modifiers */
>> +#define __NOP(x) (x)
>> +#define __NOT(x) (~(x))
>> +
>> +/**
>> + * test_and_set_bit - Set a bit and return its old value
>> + * @nr: Bit to set
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It may be reordered on other architectures than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
>> +{
>> + return __test_and_op_bit(or, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_clear_bit - Clear a bit and return its old value
>> + * @nr: Bit to clear
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It can be reordered on other architectures other than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
>> +{
>> + return __test_and_op_bit(and, __NOT, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_change_bit - Change a bit and return its old value
>> + * @nr: Bit to change
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
>> +{
>> + return __test_and_op_bit(xor, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * set_bit - Atomically set a bit in memory
>> + * @nr: the bit to set
>> + * @addr: the address to start counting from
>> + *
>> + * This function is atomic and may not be reordered. See __set_bit()
>
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.
I went ahead and fixed our comments.
https://github.com/riscv/riscv-linux/commit/38d727b99b9eb76fac533cebc23f89d364b7d60d
>
>> + * if you do not require the atomic guarantees.
>> + *
>> + * Note: there are no guarantees that this function will not be reordered
>> + * on non x86 architectures, so if you are writing portable code,
>> + * make sure not to rely on its reordering guarantees.
>> + *
>> + * Note that @nr may be almost arbitrarily large; this function is not
>> + * restricted to acting on a single-word quantity.
>> + */
>> +static inline void set_bit(int nr, volatile unsigned long *addr)
>> +{
>> + __op_bit(or, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * clear_bit - Clears a bit in memory
>> + * @nr: Bit to clear
>> + * @addr: Address to start counting from
>> + *
>> + * clear_bit() is atomic and may not be reordered. However, it does
>> + * not contain a memory barrier, so if it is used for locking purposes,
>> + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
>> + * in order to ensure changes are visible on other processors.
>> + */
>> +static inline void clear_bit(int nr, volatile unsigned long *addr)
>> +{
>> + __op_bit(and, __NOT, nr, addr);
>> +}
>> +
>> +/**
>> + * change_bit - Toggle a bit in memory
>> + * @nr: Bit to change
>> + * @addr: Address to start counting from
>> + *
>> + * change_bit() is atomic and may not be reordered. It may be
>> + * reordered on other architectures than x86.
>> + * Note that @nr may be almost arbitrarily large; this function is not
>> + * restricted to acting on a single-word quantity.
>> + */
>> +static inline void change_bit(int nr, volatile unsigned long *addr)
>> +{
>> + __op_bit(xor, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_set_bit_lock - Set a bit and return its old value, for lock
>> + * @nr: Bit to set
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and provides acquire barrier semantics.
>> + * It can be used to implement bit locks.
>> + */
>> +static inline int test_and_set_bit_lock(
>> + unsigned long nr, volatile unsigned long *addr)
>> +{
>> + return test_and_set_bit(nr, addr);
>
> If you want, you can open code an "amoor.aq" here, because
> test_and_set_bit_lock() only needs an acquire barrier.
>
>> +}
>> +
>> +/**
>> + * clear_bit_unlock - Clear a bit in memory, for unlock
>> + * @nr: the bit to set
>> + * @addr: the address to start counting from
>> + *
>> + * This operation is atomic and provides release barrier semantics.
>> + */
>> +static inline void clear_bit_unlock(
>> + unsigned long nr, volatile unsigned long *addr)
>> +{
>
> You need a smp_mb__before_atomic() here, because clear_bit() is only
> relaxed atomic. And clear_bit_unlock() is a release.
Makes sense. I went ahead and added the aq and rl bits to the AMOs already in
these two:
https://github.com/riscv/riscv-linux/commit/f3903a2b403522a8dafd5cbe850caa22755d6b5b