[PATCH] m68k: Unroll raw_outsb() loop

From: Finn Thain
Date: Fri Oct 12 2018 - 21:10:10 EST


Unroll the raw_outsb() loop using the optimized assembler code from
raw_outsw(). That code is copied and pasted, with movew changed to moveb.

This improves the performance of sequential write transfers using mac_esp
in PIO mode by 5% or 10%. (The DMA controller on the 840av/660av models is
still unsupported so PIO transfers are used.)

Tested-by: Stan Johnson <userm57@xxxxxxxxx>
Signed-off-by: Finn Thain <fthain@xxxxxxxxxxxxxxxxxxx>
---
In a separate patch series, mac_esp adopts writesb() in place of inline
assembler, making that code smaller and more reusable.
---
arch/m68k/include/asm/raw_io.h | 39 +++++++++++++++++++++++++++++++++++----
1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h
index 85761255dde5..8a6dc6e5a279 100644
--- a/arch/m68k/include/asm/raw_io.h
+++ b/arch/m68k/include/asm/raw_io.h
@@ -107,12 +107,43 @@ static inline void raw_insb(volatile u8 __iomem *port, u8 *buf, unsigned int len
}

static inline void raw_outsb(volatile u8 __iomem *port, const u8 *buf,
- unsigned int len)
+ unsigned int nr)
{
- unsigned int i;
+ unsigned int tmp;

- for (i = 0; i < len; i++)
- out_8(port, *buf++);
+ if (nr & 15) {
+ tmp = (nr & 15) - 1;
+ asm volatile (
+ "1: moveb %0@+,%2@; dbra %1,1b"
+ : "=a" (buf), "=d" (tmp)
+ : "a" (port), "0" (buf),
+ "1" (tmp));
+ }
+ if (nr >> 4) {
+ tmp = (nr >> 4) - 1;
+ asm volatile (
+ "1: "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "moveb %0@+,%2@; "
+ "dbra %1,1b"
+ : "=a" (buf), "=d" (tmp)
+ : "a" (port), "0" (buf),
+ "1" (tmp));
+ }
}

static inline void raw_insw(volatile u16 __iomem *port, u16 *buf, unsigned int nr)
--
2.16.4