[RFC PATCH v3 01/15] powerpc/syscall: Refactorise from Nick

From: Christophe Leroy
Date: Mon Apr 06 2020 - 14:16:47 EST


From: Nicholas Piggin <npiggin@xxxxxxxxx>

Christophe Leroy's on April 6, 2020 3:44 am:
> ifdef out specific PPC64 stuff to allow building
> syscall_64.c on PPC32.
>
> Modify Makefile to always build syscall.o
>
> Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
> ---
> arch/powerpc/kernel/Makefile | 5 ++---
> arch/powerpc/kernel/syscall.c | 10 ++++++++++
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 8cc3c831dccd..e4be425b7718 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \
> signal.o sysfs.o cacheinfo.o time.o \
> prom.o traps.o setup-common.o \
> udbg.o misc.o io.o misc_$(BITS).o \
> - of_platform.o prom_parse.o
> + of_platform.o prom_parse.o syscall.o
> obj-y += ptrace/
> obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
> - paca.o nvram_64.o firmware.o note.o \
> - syscall.o
> + paca.o nvram_64.o firmware.o note.o
> obj-$(CONFIG_VDSO32) += vdso32/
> obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
> index 72f3d2f0a823..28bd43db8755 100644
> --- a/arch/powerpc/kernel/syscall.c
> +++ b/arch/powerpc/kernel/syscall.c
> @@ -25,8 +25,10 @@ notrace long system_call_exception(long r3, long r4, long r5,
> unsigned long ti_flags;
> syscall_fn f;
>
> +#ifdef CONFIG_PPC64
> if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
> BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> +#endif
>
> trace_hardirqs_off(); /* finish reconciling */
>
> @@ -34,7 +36,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
> BUG_ON(!(regs->msr & MSR_RI));
> BUG_ON(!(regs->msr & MSR_PR));
> BUG_ON(!FULL_REGS(regs));
> +#ifdef CONFIG_PPC64
> BUG_ON(regs->softe != IRQS_ENABLED);
> +#endif
>
> account_cpu_user_entry();
>
> @@ -56,7 +60,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
> * frame, or if the unwinder was taught the first stack frame always
> * returns to user with IRQS_ENABLED, this store could be avoided!
> */
> +#ifdef CONFIG_PPC64
> regs->softe = IRQS_ENABLED;
> +#endif
>
> local_irq_enable();
>
> @@ -148,7 +154,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> ret |= _TIF_RESTOREALL;
> }
>
> +#ifdef CONFIG_PPC64
> again:
> +#endif
> local_irq_disable();
> ti_flags = READ_ONCE(*ti_flagsp);
> while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
> @@ -191,6 +199,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>
> /* This pattern matches prep_irq_for_idle */
> __hard_EE_RI_disable();
> +#ifdef CONFIG_PPC64
> if (unlikely(lazy_irq_pending())) {
> __hard_RI_enable();
> trace_hardirqs_off();
> @@ -201,6 +210,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
> }
> local_paca->irq_happened = 0;
> irq_soft_mask_set(IRQS_ENABLED);
> +#endif
>
> #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> local_paca->tm_scratch = regs->msr;
> --
> 2.25.0
>
>

The #ifdefs disappoint me!

Here is (unested) something that should help 32-bit avoid several ifdefs
in the main part of the function. I should have done this as part of the
merged series, but that's okay I'll submit as a cleanup.

The rest looks okay for now. Maybe we grow some helpers to manage the
soft-mask state, though I'm not really sure it would make sense for
32-bit code to ever call them. Maybe just confined to this file would be
okay but for now the ifdefs are okay.

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
---
arch/powerpc/kernel/syscall_64.c | 58 +++++++++++++++-----------------
1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index cf06eb443a80..f021db893ec2 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -103,6 +103,31 @@ notrace long system_call_exception(long r3, long r4, long r5,
return f(r3, r4, r5, r6, r7, r8);
}

+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ */
+static notrace inline bool prep_irq_for_enabled_exit(void)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ /* This pattern matches prep_irq_for_idle */
+ __hard_EE_RI_disable();
+ if (unlikely(lazy_irq_pending())) {
+ /* Took an interrupt, may have more exit work to do. */
+ __hard_RI_enable();
+ trace_hardirqs_off();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ return false;
+ }
+ local_paca->irq_happened = 0;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ return true;
+}
+
/*
* This should be called after a syscall returns, with r3 the return value
* from the syscall. If this function returns non-zero, the system call
@@ -186,21 +211,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
}
}

- /* This must be done with RI=1 because tracing may touch vmaps */
- trace_hardirqs_on();
-
- /* This pattern matches prep_irq_for_idle */
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
local_irq_enable();
- /* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
@@ -264,19 +278,11 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
}
}

- trace_hardirqs_on();
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
local_irq_enable();
local_irq_disable();
- /* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);

#ifdef CONFIG_PPC_BOOK3E
if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
@@ -334,13 +340,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
}
}

- trace_hardirqs_on();
- __hard_EE_RI_disable();
- if (unlikely(lazy_irq_pending())) {
- __hard_RI_enable();
- irq_soft_mask_set(IRQS_ALL_DISABLED);
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (unlikely(!prep_irq_for_enabled_exit())) {
/*
* Can't local_irq_restore to replay if we were in
* interrupt context. Must replay directly.
@@ -354,8 +354,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
/* Took an interrupt, may have more exit work to do. */
goto again;
}
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);
} else {
/* Returning to a kernel context with local irqs disabled. */
__hard_EE_RI_disable();
--
2.25.0