[RFC PATCH v3 15/15] powerpc/kernel: Do not inconditionally save non volatile registers on system call

From: Christophe Leroy
Date: Mon Apr 06 2020 - 14:17:00 EST


To allow that, syscall_exit_prepare() gets split in 3 parts.
On PPC32, the three parts are called from entry_32.S
On PPC64, we keep a syscall_exit_prepare() function which
concatenates the three parts.

One benefit is also that the likely part of
syscall_exit_prepare_begin() and the syscall_exit_prepare_end()
functions are frameless whereas there was no way to get the
likely part of syscall_exit_prepare() frameless.

Before : 347 cycles on null_syscall
After : 307 cycles on null_syscall, ie better than before C porting.

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
---
arch/powerpc/include/asm/asm-prototypes.h | 11 +++
arch/powerpc/kernel/entry_32.S | 25 ++++++-
arch/powerpc/kernel/head_32.h | 3 +-
arch/powerpc/kernel/syscall.c | 83 +++++++++++++++--------
4 files changed, 92 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 7d81e86a1e5d..eea5133733bb 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -98,6 +98,17 @@ unsigned long __init early_init(unsigned long dt_ptr);
void __init machine_init(u64 dt_ptr);
#endif
long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs);
+#ifdef CONFIG_PPC64
+#define static64 static
+#else
+#define static64
+#endif
+static64 notrace unsigned long
+syscall_exit_prepare_begin(unsigned long r3, struct pt_regs *regs, unsigned long ti_flags);
+static64 notrace unsigned long
+syscall_exit_prepare_loop(unsigned long ret, struct pt_regs *regs, unsigned long ti_flags);
+static64 notrace unsigned long
+syscall_exit_prepare_end(unsigned long ret, struct pt_regs *regs, unsigned long ti_flags);
notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs);
notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr);
notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 103f5158bc44..b9287fd0fcc6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -315,14 +315,37 @@ stack_ovf:
RFI
#endif

+save_nvgprs:
+ lwz r11, _TRAP(r1)
+ andi. r12, r11, 1
+ rlwinm r11, r11, 0, ~1
+ beqlr
+ SAVE_NVGPRS(r1)
+ stw r11, _TRAP(r1)
+ blr
+
.globl transfer_to_syscall
transfer_to_syscall:
+ lwz r10, TI_FLAGS(r2)
mr r9, r0
+ andi. r10, r10, _TIF_SYSCALL_DOTRACE
addi r10, r1, STACK_FRAME_OVERHEAD
+ bnel- save_nvgprs
bl system_call_exception
ret_from_syscall:
+ lwz r5, TI_FLAGS(r2)
addi r4, r1, STACK_FRAME_OVERHEAD
- bl syscall_exit_prepare
+ andi. r0, r5, _TIF_SYSCALL_DOTRACE | _TIF_SINGLESTEP | _TIF_USER_WORK_MASK
+ bnel- save_nvgprs
+ bl syscall_exit_prepare_begin
+1: lwz r5, TI_FLAGS(r2)
+ addi r4, r1, STACK_FRAME_OVERHEAD
+ andi. r0, r5, _TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
+ beq+ 1f
+ bl save_nvgprs
+ bl syscall_exit_prepare_loop
+ b 1b
+1: bl syscall_exit_prepare_end
lwz r2, _CCR(r1)
lwz r4, _NIP(r1)
lwz r5, _MSR(r1)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index c301d666a3e5..1cc9a67cb42c 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -174,13 +174,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
stw r2,GPR2(r11)
addi r10,r10,STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r11)
- li r2, \trapno
+ li r2, \trapno + 1
stw r10,8(r11)
stw r2,_TRAP(r11)
SAVE_GPR(0, r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
- SAVE_NVGPRS(r11)
addi r11,r1,STACK_FRAME_OVERHEAD
addi r2,r12,-THREAD
stw r11,PT_REGS(r12)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index af449a4a8e8f..b15f19c00ccb 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -37,7 +37,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(IS_ENABLED(CONFIG_PPC64) && !(regs->msr & MSR_PR));
- BUG_ON(!FULL_REGS(regs));
+ BUG_ON(IS_ENABLED(CONFIG_PPC64) && !FULL_REGS(regs));
BUG_ON(IS_ENABLED(CONFIG_PPC64) && get_softe(regs) != IRQS_ENABLED);

account_cpu_user_entry();
@@ -145,11 +145,9 @@ static notrace inline bool prep_irq_for_enabled_exit(void)
* The function graph tracer can not trace the return side of this function,
* because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
*/
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs)
+static64 notrace unsigned long
+syscall_exit_prepare_begin(unsigned long r3, struct pt_regs *regs, unsigned long ti_flags)
{
- unsigned long *ti_flagsp = &current_thread_info()->flags;
- unsigned long ti_flags;
unsigned long ret = 0;

regs->result = r3;
@@ -157,8 +155,6 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
/* Check whether the syscall is issued inside a restartable sequence */
rseq_syscall(regs);

- ti_flags = *ti_flagsp;
-
if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
r3 = -r3;
@@ -171,7 +167,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret = _TIF_RESTOREALL;
else
regs->gpr[3] = r3;
- clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
} else {
regs->gpr[3] = r3;
}
@@ -181,27 +177,35 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret |= _TIF_RESTOREALL;
}

-again:
local_irq_disable();
- ti_flags = READ_ONCE(*ti_flagsp);
- while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
- local_irq_enable();
- if (ti_flags & _TIF_NEED_RESCHED) {
- schedule();
- } else {
- /*
- * SIGPENDING must restore signal handler function
- * argument GPRs, and some non-volatiles (e.g., r1).
- * Restore all for now. This could be made lighter.
- */
- if (ti_flags & _TIF_SIGPENDING)
- ret |= _TIF_RESTOREALL;
- do_notify_resume(regs, ti_flags);
- }
- local_irq_disable();
- ti_flags = READ_ONCE(*ti_flagsp);
+
+ return ret;
+}
+
+static64 notrace unsigned long
+syscall_exit_prepare_loop(unsigned long ret, struct pt_regs *regs, unsigned long ti_flags)
+{
+ local_irq_enable();
+ if (ti_flags & _TIF_NEED_RESCHED) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
}
+ local_irq_disable();
+
+ return ret;
+}

+static64 notrace unsigned long
+syscall_exit_prepare_end(unsigned long ret, struct pt_regs *regs, unsigned long ti_flags)
+{
if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
unlikely((ti_flags & _TIF_RESTORE_TM))) {
@@ -221,7 +225,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,

if (unlikely(!prep_irq_for_enabled_exit())) {
local_irq_enable();
- goto again;
+ local_irq_disable();
+ return ret | 0x80000000;
}

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -235,6 +240,30 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
return ret;
}

+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
+{
+ unsigned long ret;
+ unsigned long *ti_flagsp = &current_thread_info()->flags;
+ unsigned long ti_flags = *ti_flagsp;
+
+ ret = syscall_exit_prepare_begin(r3, regs, ti_flags);
+
+again:
+ ti_flags = READ_ONCE(*ti_flagsp);
+ if (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ ret = syscall_exit_prepare_loop(ret, regs, ti_flags);
+ goto again;
+ }
+ ret = syscall_exit_prepare_end(ret, regs, ti_flags);
+ if (unlikely(ret & 0x80000000)) {
+ ret &= ~0x80000000;
+ goto again;
+ }
+ return ret;
+}
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E not yet using this */
notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
{
--
2.25.0