Re: [PATCH][PPC64] extra barrier in I/O operations
From: Rodolfo Guluarte Hale
Date: Sun May 09 2004 - 02:39:42 EST
up!
----- Original Message -----
From: "Paul Mackerras" <paulus@xxxxxxxxx>
To: <torvalds@xxxxxxxx>; <akpm@xxxxxxxx>; <olh@xxxxxxx>
Cc: <anton@xxxxxxxxx>; <benh@xxxxxxxxxxxxxxxxxxx>;
<linux-kernel@xxxxxxxxxxxxxxx>
Sent: Saturday, May 08, 2004 10:59 PM
Subject: [PATCH][PPC64] extra barrier in I/O operations
> At the moment, on PPC64, the instruction we use for wmb() doesn't
> order cacheable stores vs. non-cacheable stores. (It does order
> cacheable vs. cacheable and non-cacheable vs. non-cacheable.) This
> causes problems in the sort of driver code that writes stuff into
> memory, does a wmb(), then a writel to the device to start a DMA
> operation to read the stuff it has just written to memory.
>
> This patch solves the problem by adding a sync instruction before the
> store in the write* and out* macros. The sync is a full barrier that
> orders all loads and stores, cacheable or not. The patch also moves
> the eieio instruction that we had after the store to before the load
> in the read* and in* macros. With the sync before the store, we don't
> need an eieio as well in a sequence of stores, but we still need an
> eieio between a store and a load.
>
> I think it is better to do this than to turn wmb() into a full memory
> barrier (a sync instruction) because the full barrier is slow and
> isn't needed with the sync in the write*/out* macros. This way,
> write*/out* are fully ordered with respect to preceding loads and
> stores, which is what driver writers expect, and we avoid penalizing
> users of wmb() who are only doing cacheable stores.
>
> Please apply.
>
> Paul.
>
> diff -urN linux-2.5/include/asm-ppc64/io.h g5-ppc64/include/asm-ppc64/io.h
> --- linux-2.5/include/asm-ppc64/io.h 2004-03-20 08:59:11.000000000 +1100
> +++ g5-ppc64/include/asm-ppc64/io.h 2004-05-09 13:31:12.607957768 +1000
> @@ -240,22 +240,23 @@
> {
> int ret;
>
> - __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync" :
> - "=r" (ret) : "m" (*addr));
> + __asm__ __volatile__("eieio; lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
> + : "=r" (ret) : "m" (*addr));
> return ret;
> }
>
> static inline void out_8(volatile unsigned char *addr, int val)
> {
> - __asm__ __volatile__("stb%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r"
(val));
> + __asm__ __volatile__("sync; stb%U0%X0 %1,%0"
> + : "=m" (*addr) : "r" (val));
> }
>
> static inline int in_le16(volatile unsigned short *addr)
> {
> int ret;
>
> - __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync" :
> - "=r" (ret) : "r" (addr), "m" (*addr));
> + __asm__ __volatile__("eieio; lhbrx %0,0,%1; twi 0,%0,0; isync"
> + : "=r" (ret) : "r" (addr), "m" (*addr));
> return ret;
> }
>
> @@ -263,28 +264,29 @@
> {
> int ret;
>
> - __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync" :
> - "=r" (ret) : "m" (*addr));
> + __asm__ __volatile__("eieio; lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
> + : "=r" (ret) : "m" (*addr));
> return ret;
> }
>
> static inline void out_le16(volatile unsigned short *addr, int val)
> {
> - __asm__ __volatile__("sthbrx %1,0,%2; eieio" : "=m" (*addr) :
> - "r" (val), "r" (addr));
> + __asm__ __volatile__("sync; sthbrx %1,0,%2"
> + : "=m" (*addr) : "r" (val), "r" (addr));
> }
>
> static inline void out_be16(volatile unsigned short *addr, int val)
> {
> - __asm__ __volatile__("sth%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r"
(val));
> + __asm__ __volatile__("sync; sth%U0%X0 %1,%0"
> + : "=m" (*addr) : "r" (val));
> }
>
> static inline unsigned in_le32(volatile unsigned *addr)
> {
> unsigned ret;
>
> - __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync" :
> - "=r" (ret) : "r" (addr), "m" (*addr));
> + __asm__ __volatile__("eieio; lwbrx %0,0,%1; twi 0,%0,0; isync"
> + : "=r" (ret) : "r" (addr), "m" (*addr));
> return ret;
> }
>
> @@ -292,20 +294,21 @@
> {
> unsigned ret;
>
> - __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync" :
> - "=r" (ret) : "m" (*addr));
> + __asm__ __volatile__("eieio; lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
> + : "=r" (ret) : "m" (*addr));
> return ret;
> }
>
> static inline void out_le32(volatile unsigned *addr, int val)
> {
> - __asm__ __volatile__("stwbrx %1,0,%2; eieio" : "=m" (*addr) :
> - "r" (val), "r" (addr));
> + __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr)
> + : "r" (val), "r" (addr));
> }
>
> static inline void out_be32(volatile unsigned *addr, int val)
> {
> - __asm__ __volatile__("stw%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r"
(val));
> + __asm__ __volatile__("sync; stw%U0%X0 %1,%0; eieio"
> + : "=m" (*addr) : "r" (val));
> }
>
> static inline unsigned long in_le64(volatile unsigned long *addr)
> @@ -313,7 +316,7 @@
> unsigned long tmp, ret;
>
> __asm__ __volatile__(
> - "ld %1,0(%2)\n"
> + "eieio; ld %1,0(%2)\n"
> "twi 0,%1,0\n"
> "isync\n"
> "rldimi %0,%1,5*8,1*8\n"
> @@ -331,8 +334,8 @@
> {
> unsigned long ret;
>
> - __asm__ __volatile__("ld %0,0(%1); twi 0,%0,0; isync" :
> - "=r" (ret) : "m" (*addr));
> + __asm__ __volatile__("eieio; ld %0,0(%1); twi 0,%0,0; isync"
> + : "=r" (ret) : "m" (*addr));
> return ret;
> }
>
> @@ -348,14 +351,13 @@
> "rldicl %1,%1,32,0\n"
> "rlwimi %0,%1,8,8,31\n"
> "rlwimi %0,%1,24,16,23\n"
> - "std %0,0(%2)\n"
> - "eieio\n"
> + "sync; std %0,0(%2)\n"
> : "=r" (tmp) : "r" (val), "b" (addr) , "m" (*addr));
> }
>
> static inline void out_be64(volatile unsigned long *addr, int val)
> {
> - __asm__ __volatile__("std %1,0(%0); eieio" : "=m" (*addr) : "r" (val));
> + __asm__ __volatile__("sync; std %1,0(%0)" : "=m" (*addr) : "r" (val));
> }
>
> #ifndef CONFIG_PPC_ISERIES
>
>
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/