Re: [PATCH] powerpc32: use stmw/lmw for non volatile registers save/restore
From: Gabriel Paubert
Date: Tue May 24 2016 - 02:34:41 EST
On Mon, May 23, 2016 at 10:46:36AM +0200, Christophe Leroy wrote:
> lmw/stmw have a 1 cycle (2 cycles for lmw on some ppc) in addition
> and implies serialising, however it reduces the amount of instructions
> hence the amount of instruction fetch compared to the equivalent
> operation with several lzw/stw. It means less pressure on cache and
Minor typo, s/lzw/lwz/.
> less fetching delays on slow memory.
> When we transfer 20 registers, it is worth it.
> gcc uses stmw/lmw at function entry/exit to save/restore non
> volatile register, so lets also do it that way.
>
> On powerpc64, we can't use lmw/stmw as it only handles 32 bits, so
> we move longjmp() and setjmp() from misc.S to misc_64.S, and we
> write a 32 bits version in misc_32.S using stmw/lmw
>
> Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
> ---
> The patch goes on top of "powerpc: inline current_stack_pointer()" or
> requires trivial manual merge in arch/powerpc/kernel/misc.S
>
> arch/powerpc/include/asm/ppc_asm.h | 6 ++--
> arch/powerpc/kernel/misc.S | 61 --------------------------------------
> arch/powerpc/kernel/misc_32.S | 22 ++++++++++++++
> arch/powerpc/kernel/misc_64.S | 61 ++++++++++++++++++++++++++++++++++++++
> 4 files changed, 85 insertions(+), 65 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
> index 2b31632..e29b649 100644
> --- a/arch/powerpc/include/asm/ppc_asm.h
> +++ b/arch/powerpc/include/asm/ppc_asm.h
> @@ -82,10 +82,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
> #else
> #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
> #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
> -#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
> - SAVE_10GPRS(22, base)
> -#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \
> - REST_10GPRS(22, base)
> +#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base)
> +#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base)
> #endif
>
> #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
> diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
> index 7ce26d4..9de71d8 100644
> --- a/arch/powerpc/kernel/misc.S
> +++ b/arch/powerpc/kernel/misc.S
> @@ -53,64 +53,3 @@ _GLOBAL(add_reloc_offset)
>
> .align 3
> 2: PPC_LONG 1b
> -
> -_GLOBAL(setjmp)
> - mflr r0
> - PPC_STL r0,0(r3)
> - PPC_STL r1,SZL(r3)
> - PPC_STL r2,2*SZL(r3)
> - mfcr r0
> - PPC_STL r0,3*SZL(r3)
> - PPC_STL r13,4*SZL(r3)
> - PPC_STL r14,5*SZL(r3)
> - PPC_STL r15,6*SZL(r3)
> - PPC_STL r16,7*SZL(r3)
> - PPC_STL r17,8*SZL(r3)
> - PPC_STL r18,9*SZL(r3)
> - PPC_STL r19,10*SZL(r3)
> - PPC_STL r20,11*SZL(r3)
> - PPC_STL r21,12*SZL(r3)
> - PPC_STL r22,13*SZL(r3)
> - PPC_STL r23,14*SZL(r3)
> - PPC_STL r24,15*SZL(r3)
> - PPC_STL r25,16*SZL(r3)
> - PPC_STL r26,17*SZL(r3)
> - PPC_STL r27,18*SZL(r3)
> - PPC_STL r28,19*SZL(r3)
> - PPC_STL r29,20*SZL(r3)
> - PPC_STL r30,21*SZL(r3)
> - PPC_STL r31,22*SZL(r3)
> - li r3,0
> - blr
> -
> -_GLOBAL(longjmp)
> - PPC_LCMPI r4,0
> - bne 1f
> - li r4,1
> -1: PPC_LL r13,4*SZL(r3)
> - PPC_LL r14,5*SZL(r3)
> - PPC_LL r15,6*SZL(r3)
> - PPC_LL r16,7*SZL(r3)
> - PPC_LL r17,8*SZL(r3)
> - PPC_LL r18,9*SZL(r3)
> - PPC_LL r19,10*SZL(r3)
> - PPC_LL r20,11*SZL(r3)
> - PPC_LL r21,12*SZL(r3)
> - PPC_LL r22,13*SZL(r3)
> - PPC_LL r23,14*SZL(r3)
> - PPC_LL r24,15*SZL(r3)
> - PPC_LL r25,16*SZL(r3)
> - PPC_LL r26,17*SZL(r3)
> - PPC_LL r27,18*SZL(r3)
> - PPC_LL r28,19*SZL(r3)
> - PPC_LL r29,20*SZL(r3)
> - PPC_LL r30,21*SZL(r3)
> - PPC_LL r31,22*SZL(r3)
> - PPC_LL r0,3*SZL(r3)
> - mtcrf 0x38,r0
> - PPC_LL r0,0(r3)
> - PPC_LL r1,SZL(r3)
> - PPC_LL r2,2*SZL(r3)
> - mtlr r0
> - mr r3,r4
> - blr
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index d9c912b..de419e9 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -1086,3 +1086,25 @@ relocate_new_kernel_end:
> relocate_new_kernel_size:
> .long relocate_new_kernel_end - relocate_new_kernel
> #endif
> +
> +_GLOBAL(setjmp)
> + mflr r0
> + li r3, 0
> + stw r0, 0(r3)
Huh? Explicitly writing to address 0? Has this code been test run at
least once?
At least move the li r3,0 to just before the blr.
Gabriel
> + stw r1, 4(r3)
> + stw r2, 8(r3)
> + mfcr r12
> + stmw r12, 12(r3)
> + blr
> +
> +_GLOBAL(longjmp)
> + lwz r0, 0(r3)
> + lwz r1, 4(r3)
> + lwz r2, 8(r3)
> + lmw r12, 12(r3)
> + mtcrf 0x38, r12
> + mtlr r0
> + mr. r3, r4
> + bnelr
> + li r3, 1
> + blr
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index f28754c..7e25249 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -701,3 +701,64 @@ _GLOBAL(kexec_sequence)
> li r5,0
> blr /* image->start(physid, image->start, 0); */
> #endif /* CONFIG_KEXEC */
> +
> +_GLOBAL(setjmp)
> + mflr r0
> + PPC_STL r0,0(r3)
> + PPC_STL r1,SZL(r3)
> + PPC_STL r2,2*SZL(r3)
> + mfcr r0
> + PPC_STL r0,3*SZL(r3)
> + PPC_STL r13,4*SZL(r3)
> + PPC_STL r14,5*SZL(r3)
> + PPC_STL r15,6*SZL(r3)
> + PPC_STL r16,7*SZL(r3)
> + PPC_STL r17,8*SZL(r3)
> + PPC_STL r18,9*SZL(r3)
> + PPC_STL r19,10*SZL(r3)
> + PPC_STL r20,11*SZL(r3)
> + PPC_STL r21,12*SZL(r3)
> + PPC_STL r22,13*SZL(r3)
> + PPC_STL r23,14*SZL(r3)
> + PPC_STL r24,15*SZL(r3)
> + PPC_STL r25,16*SZL(r3)
> + PPC_STL r26,17*SZL(r3)
> + PPC_STL r27,18*SZL(r3)
> + PPC_STL r28,19*SZL(r3)
> + PPC_STL r29,20*SZL(r3)
> + PPC_STL r30,21*SZL(r3)
> + PPC_STL r31,22*SZL(r3)
> + li r3,0
> + blr
> +
> +_GLOBAL(longjmp)
> + PPC_LCMPI r4,0
> + bne 1f
> + li r4,1
> +1: PPC_LL r13,4*SZL(r3)
> + PPC_LL r14,5*SZL(r3)
> + PPC_LL r15,6*SZL(r3)
> + PPC_LL r16,7*SZL(r3)
> + PPC_LL r17,8*SZL(r3)
> + PPC_LL r18,9*SZL(r3)
> + PPC_LL r19,10*SZL(r3)
> + PPC_LL r20,11*SZL(r3)
> + PPC_LL r21,12*SZL(r3)
> + PPC_LL r22,13*SZL(r3)
> + PPC_LL r23,14*SZL(r3)
> + PPC_LL r24,15*SZL(r3)
> + PPC_LL r25,16*SZL(r3)
> + PPC_LL r26,17*SZL(r3)
> + PPC_LL r27,18*SZL(r3)
> + PPC_LL r28,19*SZL(r3)
> + PPC_LL r29,20*SZL(r3)
> + PPC_LL r30,21*SZL(r3)
> + PPC_LL r31,22*SZL(r3)
> + PPC_LL r0,3*SZL(r3)
> + mtcrf 0x38,r0
> + PPC_LL r0,0(r3)
> + PPC_LL r1,SZL(r3)
> + PPC_LL r2,2*SZL(r3)
> + mtlr r0
> + mr r3,r4
> + blr
> --
> 2.1.0
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@xxxxxxxxxxxxxxxx
> https://lists.ozlabs.org/listinfo/linuxppc-dev