[RFC PATCH v8 17/21] riscv: Optimize vector registers initialization

From: Greentime Hu
Date: Wed Sep 08 2021 - 13:46:44 EST


This patch optimizes the initialization or invalidation of vector
registers. It can reduce the code sizes of vector_flush_cpu_state()
and reset_regs().

Co-developed-by: Han-Kuan Chen <hankuan.chen@xxxxxxxxxx>
Signed-off-by: Han-Kuan Chen <hankuan.chen@xxxxxxxxxx>
Signed-off-by: Greentime Hu <greentime.hu@xxxxxxxxxx>
---
arch/riscv/kernel/head.S | 30 +-----------------------
arch/riscv/kernel/kernel_mode_vector.c | 32 ++------------------------
2 files changed, 3 insertions(+), 59 deletions(-)

diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 42eb3203fa77..8362d7458c6c 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -442,39 +442,11 @@ ENTRY(reset_regs)

li t1, SR_VS
csrs CSR_STATUS, t1
- vsetvli t1, x0, e8, m1
+ vsetvli t1, x0, e8, m8
vmv.v.i v0, 0
- vmv.v.i v1, 0
- vmv.v.i v2, 0
- vmv.v.i v3, 0
- vmv.v.i v4, 0
- vmv.v.i v5, 0
- vmv.v.i v6, 0
- vmv.v.i v7, 0
vmv.v.i v8, 0
- vmv.v.i v9, 0
- vmv.v.i v10, 0
- vmv.v.i v11, 0
- vmv.v.i v12, 0
- vmv.v.i v13, 0
- vmv.v.i v14, 0
- vmv.v.i v15, 0
vmv.v.i v16, 0
- vmv.v.i v17, 0
- vmv.v.i v18, 0
- vmv.v.i v19, 0
- vmv.v.i v20, 0
- vmv.v.i v21, 0
- vmv.v.i v22, 0
- vmv.v.i v23, 0
vmv.v.i v24, 0
- vmv.v.i v25, 0
- vmv.v.i v26, 0
- vmv.v.i v27, 0
- vmv.v.i v28, 0
- vmv.v.i v29, 0
- vmv.v.i v30, 0
- vmv.v.i v31, 0
/* note that the caller must clear SR_VS */
#endif /* CONFIG_VECTOR */

diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index 108cfafe7496..b84618630edf 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -86,39 +86,11 @@ static void vector_flush_cpu_state(void)
long tmp;

__asm__ __volatile__ (
- "vsetvli %0, x0, e8, m1\n"
+ "vsetvli %0, x0, e8, m8\n"
"vmv.v.i v0, 0\n"
- "vmv.v.i v1, 0\n"
- "vmv.v.i v2, 0\n"
- "vmv.v.i v3, 0\n"
- "vmv.v.i v4, 0\n"
- "vmv.v.i v5, 0\n"
- "vmv.v.i v6, 0\n"
- "vmv.v.i v7, 0\n"
"vmv.v.i v8, 0\n"
- "vmv.v.i v9, 0\n"
- "vmv.v.i v10, 0\n"
- "vmv.v.i v11, 0\n"
- "vmv.v.i v12, 0\n"
- "vmv.v.i v13, 0\n"
- "vmv.v.i v14, 0\n"
- "vmv.v.i v15, 0\n"
"vmv.v.i v16, 0\n"
- "vmv.v.i v17, 0\n"
- "vmv.v.i v18, 0\n"
- "vmv.v.i v19, 0\n"
- "vmv.v.i v20, 0\n"
- "vmv.v.i v21, 0\n"
- "vmv.v.i v22, 0\n"
- "vmv.v.i v23, 0\n"
- "vmv.v.i v24, 0\n"
- "vmv.v.i v25, 0\n"
- "vmv.v.i v26, 0\n"
- "vmv.v.i v27, 0\n"
- "vmv.v.i v28, 0\n"
- "vmv.v.i v29, 0\n"
- "vmv.v.i v30, 0\n"
- "vmv.v.i v31, 0\n":"=r"(tmp)::);
+ "vmv.v.i v24, 0\n":"=r"(tmp)::);
}

/*
--
2.31.1