[PATCH 4/5] riscv: __asm_to/copy_from_user: Bulk copy while shifting

From: Akira Tsukamoto
Date: Sat Jun 19 2021 - 07:37:54 EST



The destination address is aligned now, but often time the source address
is not in an aligned boundary.

To reduce the unaligned memory access, it reads the data from source in
aligned boundaries, which will cause the data to have an offset, and then
combines the data in the next iteration by fixing offset with shifting
before writing to destination.

The majority of the improving copy speed comes from this shift copy.

Signed-off-by: Akira Tsukamoto <akira.tsukamoto@xxxxxxxxx>
---
arch/riscv/lib/uaccess.S | 60 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)

diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 4906b5ca91c3..e2e57551fc76 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -56,10 +56,70 @@ ENTRY(__asm_copy_from_user)
bltu a0, t1, 1b /* t1 - start of aligned dst */

.Lskip_first_bytes:
+ /*
+ * Now dst is aligned.
+ * Use shift-copy if src is misaligned.
+ * Use word-copy if both src and dst are aligned because
+ * can not use shift-copy which do not require shifting
+ */
+ /* a1 - start of src */
+ andi a3, a1, SZREG-1
+ bnez a3, .Lshift_copy

.Lword_copy:
.Lshift_copy:

+ /*
+ * Word copy with shifting.
+ * For misaligned copy we still perform aligned word copy, but
+ * we need to use the value fetched from the previous iteration and
+ * do some shifts.
+ * This is safe because reading less than a word size.
+ *
+ * a0 - start of aligned dst
+ * a1 - start of src
+ * a3 - a1 & mask:(SZREG-1)
+ * t0 - end of uncopied dst
+ * t1 - end of aligned dst
+ */
+ /* calculating aligned word boundary for dst */
+ andi t1, t0, ~(SZREG-1)
+ /* Converting unaligned src to aligned arc */
+ andi a1, a1, ~(SZREG-1)
+
+ /*
+ * Calculate shifts
+ * t3 - prev shift
+ * t4 - current shift
+ */
+ slli t3, a3, LGREG
+ li a5, SZREG*8
+ sub t4, a5, t3
+
+ /* Load the first word to combine with seceond word */
+ fixup REG_L a5, 0(a1), 10f
+
+3:
+ /* Main shifting copy
+ *
+ * a0 - start of aligned dst
+ * a1 - start of aligned src
+ * t1 - end of aligned dst
+ */
+
+ /* At least one iteration will be executed */
+ srl a4, a5, t3
+ fixup REG_L a5, SZREG(a1), 10f
+ addi a1, a1, SZREG
+ sll a2, a5, t4
+ or a2, a2, a4
+ fixup REG_S a2, 0(a0), 10f
+ addi a0, a0, SZREG
+ bltu a0, t1, 3b
+
+ /* Revert src to original unaligned value */
+ add a1, a1, a3
+
.Lbyte_copy_tail:
/*
* Byte copy anything left.
--
2.17.1