Re: [PATCH V3 11/27] csky: Atomic operations
From: Peter Zijlstra
Date: Wed Sep 12 2018 - 11:55:39 EST
On Wed, Sep 12, 2018 at 09:24:45PM +0800, Guo Ren wrote:
> +#define ATOMIC_OP(op, c_op) \
> +static inline void atomic_##op(int i, atomic_t *v) \
> +{ \
> + unsigned long tmp; \
> + \
> + smp_mb(); \
> + asm volatile ( \
> + "1: ldex.w %0, (%2) \n" \
> + " " #op " %0, %1 \n" \
> + " stex.w %0, (%2) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp) \
> + : "r" (i), "r"(&v->counter) \
> + : "memory"); \
> + smp_mb(); \
> +}
ATOMIC_OP doesn't need to imply any smp_mb()'s what so ever.
> +#define ATOMIC_OP_RETURN(op, c_op) \
> +static inline int atomic_##op##_return(int i, atomic_t *v) \
> +{ \
> + unsigned long tmp, ret; \
> + \
> + smp_mb(); \
> + asm volatile ( \
> + "1: ldex.w %0, (%3) \n" \
> + " " #op " %0, %2 \n" \
> + " mov %1, %0 \n" \
> + " stex.w %0, (%3) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp), "=&r" (ret) \
> + : "r" (i), "r"(&v->counter) \
> + : "memory"); \
> + smp_mb(); \
> + \
> + return ret; \
> +}
> +
> +#define ATOMIC_FETCH_OP(op, c_op) \
> +static inline int atomic_fetch_##op(int i, atomic_t *v) \
> +{ \
> + unsigned long tmp, ret; \
> + \
> + smp_mb(); \
> + asm volatile ( \
> + "1: ldex.w %0, (%3) \n" \
> + " mov %1, %0 \n" \
> + " " #op " %0, %2 \n" \
> + " stex.w %0, (%3) \n" \
> + " bez %0, 1b \n" \
> + : "=&r" (tmp), "=&r" (ret) \
> + : "r" (i), "r"(&v->counter) \
> + : "memory"); \
> + smp_mb(); \
> + \
> + return ret; \
> +}
For these you could generate _relaxed variants and not provide smp_mb()
inside them.
> +#else /* CONFIG_CPU_HAS_LDSTEX */
> +
> +#include <linux/irqflags.h>
> +
> +#define ATOMIC_OP(op, c_op) \
> +static inline void atomic_##op(int i, atomic_t *v) \
> +{ \
> + unsigned long tmp, flags; \
> + \
> + raw_local_irq_save(flags); \
> + \
> + asm volatile ( \
> + " ldw %0, (%2) \n" \
> + " " #op " %0, %1 \n" \
> + " stw %0, (%2) \n" \
> + : "=&r" (tmp) \
> + : "r" (i), "r"(&v->counter) \
> + : "memory"); \
> + \
> + raw_local_irq_restore(flags); \
> +}
Is this really 'better' than the generic UP fallback implementation?
> diff --git a/arch/csky/include/asm/spinlock.h b/arch/csky/include/asm/spinlock.h
> new file mode 100644
> index 0000000..f1081bb
> --- /dev/null
> +++ b/arch/csky/include/asm/spinlock.h
> @@ -0,0 +1,286 @@
> +#ifndef __ASM_CSKY_SPINLOCK_H
> +#define __ASM_CSKY_SPINLOCK_H
> +
> +#include <linux/spinlock_types.h>
> +#include <asm/barrier.h>
> +
> +#ifdef CONFIG_QUEUED_RWLOCKS
> +
> +/*
> + * Ticket-based spin-locking.
> + */
> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> + arch_spinlock_t lockval;
> + u32 ticket_next = 1 << TICKET_NEXT;
> + u32 *p = &lock->lock;
> + u32 tmp;
> +
> + smp_mb();
spin_lock() doesn't need smp_mb() before.
> + asm volatile (
> + "1: ldex.w %0, (%2) \n"
> + " mov %1, %0 \n"
> + " add %0, %3 \n"
> + " stex.w %0, (%2) \n"
> + " bez %0, 1b \n"
> + : "=&r" (tmp), "=&r" (lockval)
> + : "r"(p), "r"(ticket_next)
> + : "cc");
> +
> + while (lockval.tickets.next != lockval.tickets.owner) {
> + lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
> + }
> +
> + smp_mb();
> +}
> +
> +static inline int arch_spin_trylock(arch_spinlock_t *lock)
> +{
> + u32 tmp, contended, res;
> + u32 ticket_next = 1 << TICKET_NEXT;
> + u32 *p = &lock->lock;
> +
> + smp_mb();
idem.
> + do {
> + asm volatile (
> + " ldex.w %0, (%3) \n"
> + " movi %2, 1 \n"
> + " rotli %1, %0, 16 \n"
> + " cmpne %1, %0 \n"
> + " bt 1f \n"
> + " movi %2, 0 \n"
> + " add %0, %0, %4 \n"
> + " stex.w %0, (%3) \n"
> + "1: \n"
> + : "=&r" (res), "=&r" (tmp), "=&r" (contended)
> + : "r"(p), "r"(ticket_next)
> + : "cc");
> + } while (!res);
> +
> + if (!contended)
> + smp_mb();
> +
> + return !contended;
> +}
> +
> +static inline void arch_spin_unlock(arch_spinlock_t *lock)
> +{
> + smp_mb();
> + lock->tickets.owner++;
> + smp_mb();
spin_unlock() doesn't need smp_mb() after.
> +}
> +
> +static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
> +{
> + return lock.tickets.owner == lock.tickets.next;
> +}
> +
> +static inline int arch_spin_is_locked(arch_spinlock_t *lock)
> +{
> + return !arch_spin_value_unlocked(READ_ONCE(*lock));
> +}
> +
> +static inline int arch_spin_is_contended(arch_spinlock_t *lock)
> +{
> + struct __raw_tickets tickets = READ_ONCE(lock->tickets);
> + return (tickets.next - tickets.owner) > 1;
> +}
> +#define arch_spin_is_contended arch_spin_is_contended
> +
> +#include <asm/qrwlock.h>
> +
> +/* See include/linux/spinlock.h */
> +#define smp_mb__after_spinlock() smp_mb()
> +
> +#else /* CONFIG_QUEUED_RWLOCKS */
> +
> +/*
> + * Test-and-set spin-locking.
> + */
Why retain that?
same comments; it has far too many smp_mb()s in.
> +#endif /* CONFIG_QUEUED_RWLOCKS */
> +#endif /* __ASM_CSKY_SPINLOCK_H */
> diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h
> new file mode 100644
> index 0000000..7e825c2
> --- /dev/null
> +++ b/arch/csky/include/asm/spinlock_types.h
> @@ -0,0 +1,35 @@
> +#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
> +#define __ASM_CSKY_SPINLOCK_TYPES_H
> +
> +#ifndef __LINUX_SPINLOCK_TYPES_H
> +# error "please don't include this file directly"
> +#endif
> +
> +#define TICKET_NEXT 16
> +
> +typedef struct {
> + union {
> + u32 lock;
> + struct __raw_tickets {
> + /* little endian */
> + u16 owner;
> + u16 next;
> + } tickets;
> + };
> +} arch_spinlock_t;
> +
> +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
> +
> +#ifdef CONFIG_QUEUED_RWLOCKS
> +#include <asm-generic/qrwlock_types.h>
> +
> +#else /* CONFIG_NR_CPUS > 2 */
> +
> +typedef struct {
> + u32 lock;
> +} arch_rwlock_t;
> +
> +#define __ARCH_RW_LOCK_UNLOCKED { 0 }
> +
> +#endif /* CONFIG_QUEUED_RWLOCKS */
> +#endif /* __ASM_CSKY_SPINLOCK_TYPES_H */