Re: [PATCH V4] powerpc: Implement {cmp}xchg for u8 and u16
From: Boqun Feng
Date: Wed Apr 27 2016 - 10:13:11 EST
On Wed, Apr 27, 2016 at 09:58:17PM +0800, Boqun Feng wrote:
> On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote:
> > From: Pan Xinhui <xinhui.pan@xxxxxxxxxxxxxxxxxx>
> >
> > Implement xchg{u8,u16}{local,relaxed}, and
> > cmpxchg{u8,u16}{,local,acquire,relaxed}.
> >
> > It works on all ppc.
> >
> > remove volatile of first parameter in __cmpxchg_local and __cmpxchg
> >
> > Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> > Signed-off-by: Pan Xinhui <xinhui.pan@xxxxxxxxxxxxxxxxxx>
> > ---
> > change from v3:
> > rewrite in asm for the LL/SC.
> > remove volatile in __cmpxchg_local and __cmpxchg.
> > change from v2:
> > in the do{}while(), we save one load and use corresponding cmpxchg suffix.
> > Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN
> > change from V1:
> > rework totally.
> > ---
> > arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 106 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> > index 44efe73..8a3735f 100644
> > --- a/arch/powerpc/include/asm/cmpxchg.h
> > +++ b/arch/powerpc/include/asm/cmpxchg.h
> > @@ -7,6 +7,71 @@
> > #include <asm/asm-compat.h>
> > #include <linux/bug.h>
> >
> > +#ifdef __BIG_ENDIAN
> > +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE)
> > +#else
> > +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE)
> > +#endif
> > +
> > +#define XCHG_GEN(type, sfx, cl) \
> > +static inline u32 __xchg_##type##sfx(void *p, u32 val) \
> > +{ \
> > + unsigned int prev, prev_mask, tmp, bitoff, off; \
> > + \
> > + off = (unsigned long)p % sizeof(u32); \
> > + bitoff = BITOFF_CAL(sizeof(type), off); \
> > + p -= off; \
> > + val <<= bitoff; \
> > + prev_mask = (u32)(type)-1 << bitoff; \
> > + \
> > + __asm__ __volatile__( \
> > +"1: lwarx %0,0,%3\n" \
> > +" andc %1,%0,%5\n" \
> > +" or %1,%1,%4\n" \
> > + PPC405_ERR77(0,%3) \
> > +" stwcx. %1,0,%3\n" \
> > +" bne- 1b\n" \
> > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
>
> I think we can save the "tmp" here by:
>
> __asm__ volatile__(
> "1: lwarx %0,0,%2\n"
> " andc %0,%0,%4\n"
> " or %0,%0,%3\n"
> PPC405_ERR77(0,%2)
> " stwcx. %0,0,%2\n"
> " bne- 1b\n"
> : "=&r" (prev), "+m" (*(u32*)p)
> : "r" (p), "r" (val), "r" (prev_mask)
> : "cc", cl);
>
> right?
>
> > + : "r" (p), "r" (val), "r" (prev_mask) \
> > + : "cc", cl); \
> > + \
> > + return prev >> bitoff; \
> > +}
> > +
> > +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \
> > +static inline \
> > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \
> > +{ \
> > + unsigned int prev, prev_mask, tmp, bitoff, off; \
> > + \
> > + off = (unsigned long)p % sizeof(u32); \
> > + bitoff = BITOFF_CAL(sizeof(type), off); \
> > + p -= off; \
> > + old <<= bitoff; \
> > + new <<= bitoff; \
> > + prev_mask = (u32)(type)-1 << bitoff; \
> > + \
> > + __asm__ __volatile__( \
> > + br \
> > +"1: lwarx %0,0,%3\n" \
> > +" and %1,%0,%6\n" \
> > +" cmpw 0,%1,%4\n" \
> > +" bne- 2f\n" \
> > +" andc %1,%0,%6\n" \
> > +" or %1,%1,%5\n" \
> > + PPC405_ERR77(0,%3) \
> > +" stwcx. %1,0,%3\n" \
> > +" bne- 1b\n" \
> > + br2 \
> > + "\n" \
> > +"2:" \
> > + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
>
> And "tmp" here could also be saved by:
>
> "1: lwarx %0,0,%2\n" \
> " xor %3,%0,%3\n" \
> " and. %3,%3,%5\n" \
> " bne- 2f\n" \
> " andc %0,%0,%5\n" \
> " or %0,%0,%4\n" \
> PPC405_ERR77(0,%2) \
> " stwcx. %0,0,%2\n" \
> " bne- 1b\n" \
> br2 \
> "\n" \
> "2:" \
> : "=&r" (prev), "+m" (*(u32*)p) \
> : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \
> : "cc", cl); \
>
Oops, this should be:
"1: lwarx %0,0,%3\n" \
" xor %2,%0,%2\n" \
" and. %2,%2,%5\n" \
" bne- 2f\n" \
" andc %0,%0,%5\n" \
" or %0,%0,%4\n" \
PPC405_ERR77(0,%3) \
" stwcx. %0,0,%3\n" \
" bne- 1b\n" \
br2 \
"\n" \
"2:" \
: "=&r" (prev), "+m" (*(u32*)p), "+&r" (old) \
: "r" (p), "r" (new), "r" (prev_mask) \
: "cc", cl); \
Regards,
Boqun
Attachment:
signature.asc
Description: PGP signature