[PATCH 2/2] powerpc/32s: Unroll kuep_lock and kuep_unlock macros

From: Christophe Leroy
Date: Wed Jan 20 2021 - 02:35:18 EST


Unroll the loops in kuep_lock and kuep_unlock.

Benchmarked on an mpc 8321 with a standard kernel having a
3M/1M user/kernel memory split, i.e. 12 segments for user.

Without KUEP, null_syscall benchmark is 220 cycles.
With KUEP, null_syscall benchmark is 439 cycles.

Once loops are unrolled, null_syscall benchmark is 366 cycles.
This is almost 17% reduction.

It is assumed that userspace covers at least 4 segments and
at most 14 segments.

The isync is removed, it saves 8 cycles. For kuep_unlock, the rfi
will do the synchronisation. For kuep_lock, we get a small window
during which exec is still possible, but is won't last more than a
few instructions.

Both macros are called two times so the size increase is in
the noise (approx 120 instructions).

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
---
arch/powerpc/include/asm/book3s/32/kup.h | 67 ++++++++++++++++++------
1 file changed, 52 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index a0117a9d5b06..e800b515ac02 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,21 +7,61 @@

#ifdef __ASSEMBLY__

-.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
-101: mtsrin \gpr1, \gpr2
- addi \gpr1, \gpr1, 0x111 /* next VSID */
- rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
- addis \gpr2, \gpr2, 0x1000 /* address of next segment */
- bdnz 101b
- isync
+.macro kuep_increment gpr1, gpr2
+ addi \gpr1, \gpr1, 0x222 /* Next second VSID */
+ addi \gpr2, \gpr2, 0x222 /* Next second VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* Clear VSID overflow */
+ rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */
+.endm
+
+.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr1 or gpr2 due to addi */
+ addi \gpr2, \gpr1, 0x111 /* Next VSID */
+ rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */
+ mtsr 0, \gpr1
+ mtsr 1, \gpr2
+ kuep_increment \gpr1, \gpr2
+ mtsr 2, \gpr1
+ mtsr 3, \gpr2
+#if NUM_USER_SEGMENTS > 4
+ kuep_increment \gpr1, \gpr2
+ mtsr 4, \gpr1
+#if NUM_USER_SEGMENTS > 5
+ mtsr 5, \gpr2
+#if NUM_USER_SEGMENTS > 6
+ kuep_increment \gpr1, \gpr2
+ mtsr 6, \gpr1
+#if NUM_USER_SEGMENTS > 7
+ mtsr 7, \gpr2
+#if NUM_USER_SEGMENTS > 8
+ kuep_increment \gpr1, \gpr2
+ mtsr 8, \gpr1
+#if NUM_USER_SEGMENTS > 9
+ mtsr 9, \gpr2
+#if NUM_USER_SEGMENTS > 10
+ kuep_increment \gpr1, \gpr2
+ mtsr 10, \gpr1
+#if NUM_USER_SEGMENTS > 11
+ mtsr 11, \gpr2
+#if NUM_USER_SEGMENTS > 12
+ kuep_increment \gpr1, \gpr2
+ mtsr 12, \gpr1
+#if NUM_USER_SEGMENTS > 13
+ mtsr 13, \gpr2
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
.endm

.macro kuep_lock gpr1, gpr2
#ifdef CONFIG_PPC_KUEP
- li \gpr1, NUM_USER_SEGMENTS
- li \gpr2, 0
- mtctr \gpr1
- mfsrin \gpr1, \gpr2
+ mfsr \gpr1, 0
oris \gpr1, \gpr1, SR_NX@h /* set Nx */
kuep_update_sr \gpr1, \gpr2
#endif
@@ -29,10 +69,7 @@

.macro kuep_unlock gpr1, gpr2
#ifdef CONFIG_PPC_KUEP
- li \gpr1, NUM_USER_SEGMENTS
- li \gpr2, 0
- mtctr \gpr1
- mfsrin \gpr1, \gpr2
+ mfsr \gpr1, 0
rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
kuep_update_sr \gpr1, \gpr2
#endif
--
2.25.0