Re: [PATCH v2 3/4] powernv: cpuidle: Redesign idle states management

From: Paul Mackerras
Date: Thu Nov 27 2014 - 18:50:50 EST


On Tue, Nov 25, 2014 at 04:47:58PM +0530, Shreyas B. Prabhu wrote:
[snip]
> +2:
> + /* Sleep or winkle */
> + li r7,1
> + mfspr r8,SPRN_PIR
> + /*
> + * The last 3 bits of PIR represents the thread id of a cpu
> + * in power8. This will need adjusting for power7.
> + */
> + andi. r8,r8,0x07 /* Get thread id into r8 */
> + rotld r7,r7,r8

I would suggest adding another u8 field to the paca to store our
thread bit, and initialize it to 1 << (cpu_id % threads_per_core)
early on. That will handle the POWER7 case correctly and reduce these
four instructions to one.

> +
> + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +lwarx_loop1:
> + lwarx r15,0,r14
> + andc r15,r15,r7 /* Clear thread bit */
> +
> + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
> + beq last_thread
> +
> + /* Not the last thread to goto sleep */
> + stwcx. r15,0,r14
> + bne- lwarx_loop1
> + b common_enter
> +
> +last_thread:
> + LOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)
> + lbz r3,0(r3)
> + cmpwi r3,1
> + bne common_enter
> + /*
> + * Last thread of the core entering sleep. Last thread needs to execute
> + * the hardware bug workaround code. Before that, set the lock bit to
> + * avoid the race of other threads waking up and undoing workaround
> + * before workaround is applied.
> + */
> + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
> + stwcx. r15,0,r14
> + bne- lwarx_loop1
> +
> + /* Fast sleep workaround */
> + li r3,1
> + li r4,1
> + li r0,OPAL_CONFIG_CPU_IDLE_STATE
> + bl opal_call_realmode
> +
> + /* Clear Lock bit */
> + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
> + stw r15,0(r14)

In this case we know the result of the andi. will be 0, so this could
be just li r0,0; stw r0,0(r14).

> +
> +common_enter: /* common code for all the threads entering sleep */
> + IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
>
> _GLOBAL(power7_idle)
> /* Now check if user or arch enabled NAP mode */
> @@ -141,49 +191,16 @@ _GLOBAL(power7_idle)
>
> _GLOBAL(power7_nap)
> mr r4,r3
> - li r3,0
> + li r3,PNV_THREAD_NAP
> b power7_powersave_common
> /* No return */
>
> _GLOBAL(power7_sleep)
> - li r3,1
> + li r3,PNV_THREAD_SLEEP
> li r4,1
> b power7_powersave_common
> /* No return */
>
> -/*
> - * Make opal call in realmode. This is a generic function to be called
> - * from realmode from reset vector. It handles endianess.
> - *
> - * r13 - paca pointer
> - * r1 - stack pointer
> - * r3 - opal token
> - */
> -opal_call_realmode:
> - mflr r12
> - std r12,_LINK(r1)
> - ld r2,PACATOC(r13)
> - /* Set opal return address */
> - LOAD_REG_ADDR(r0,return_from_opal_call)
> - mtlr r0
> - /* Handle endian-ness */
> - li r0,MSR_LE
> - mfmsr r12
> - andc r12,r12,r0
> - mtspr SPRN_HSRR1,r12
> - mr r0,r3 /* Move opal token to r0 */
> - LOAD_REG_ADDR(r11,opal)
> - ld r12,8(r11)
> - ld r2,0(r11)
> - mtspr SPRN_HSRR0,r12
> - hrfid
> -
> -return_from_opal_call:
> - FIXUP_ENDIAN
> - ld r0,_LINK(r1)
> - mtlr r0
> - blr
> -
> #define CHECK_HMI_INTERRUPT \
> mfspr r0,SPRN_SRR1; \
> BEGIN_FTR_SECTION_NESTED(66); \
> @@ -196,10 +213,8 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
> /* Invoke opal call to handle hmi */ \
> ld r2,PACATOC(r13); \
> ld r1,PACAR1(r13); \
> - std r3,ORIG_GPR3(r1); /* Save original r3 */ \
> - li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
> + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
> bl opal_call_realmode; \
> - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
> 20: nop;
>
>
> @@ -210,12 +225,91 @@ _GLOBAL(power7_wakeup_tb_loss)
> BEGIN_FTR_SECTION
> CHECK_HMI_INTERRUPT
> END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> +
> + li r7,1
> + mfspr r8,SPRN_PIR
> + /*
> + * The last 3 bits of PIR represents the thread id of a cpu
> + * in power8. This will need adjusting for power7.
> + */
> + andi. r8,r8,0x07 /* Get thread id into r8 */
> + rotld r7,r7,r8
> + /* r7 now has 'thread_id'th bit set */
> +
> + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +lwarx_loop2:
> + lwarx r15,0,r14
> + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
> + /*
> + * Lock bit is set in one of the 2 cases-
> + * a. In the sleep/winkle enter path, the last thread is executing
> + * fastsleep workaround code.
> + * b. In the wake up path, another thread is executing fastsleep
> + * workaround undo code or resyncing timebase or restoring context
> + * In either case loop until the lock bit is cleared.
> + */
> + bne lwarx_loop2
> +
> + cmpwi cr2,r15,0
> + or r15,r15,r7 /* Set thread bit */
> +
> + beq cr2,first_thread
> +
> + /* Not first thread in core to wake up */
> + stwcx. r15,0,r14
> + bne- lwarx_loop2
> + b common_exit
> +
> +first_thread:
> + /* First thread in core to wakeup */
> + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
> + stwcx. r15,0,r14
> + bne- lwarx_loop2
> +
> + LOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)
> + lbz r3,0(r3)
> + cmpwi r3,1
> + /* skip fastsleep workaround if its not needed */
> + bne timebase_resync
> +
> + /* Undo fast sleep workaround */
> + mfcr r16 /* Backup CR into a non-volatile register */
> + li r3,1
> + li r4,0
> + li r0,OPAL_CONFIG_CPU_IDLE_STATE
> + bl opal_call_realmode
> + mtcr r16 /* Restore CR */
> +
> + /* Do timebase resync if we are waking up from sleep. Use cr1 value
> + * set in exceptions-64s.S */
> + ble cr1,clear_lock
> +
> +timebase_resync:
> /* Time base re-sync */
> - li r3,OPAL_RESYNC_TIMEBASE
> + li r0,OPAL_RESYNC_TIMEBASE
> bl opal_call_realmode;

So if pnv_need_fastsleep_workaround is zero, we always do the timebase
resync, but if pnv_need_fastsleep_workaround is one, we only do the
timebase resync if we had a loss of state. Is that really what you
meant?

> -
> /* TODO: Check r3 for failure */
>
> +clear_lock:
> + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
> + stw r15,0(r14)
> +
> +common_exit:
> + li r5,PNV_THREAD_RUNNING
> + stb r5,PACA_THREAD_IDLE_STATE(r13)
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> + li r0,KVM_HWTHREAD_IN_KERNEL
> + stb r0,HSTATE_HWTHREAD_STATE(r13)
> + /* Order setting hwthread_state vs. testing hwthread_req */
> + sync
> + lbz r0,HSTATE_HWTHREAD_REQ(r13)
> + cmpwi r0,0
> + beq 6f
> + b kvm_start_guest
> +6:
> +#endif

I'd prefer not to duplicate this code. Could you instead branch back
to the code in exceptions-64s.S? Or call this code via a bl and get
back to exceptions-64s.S via a blr.

> +
> REST_NVGPRS(r1)
> REST_GPR(2, r1)
> ld r3,_CCR(r1)
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index feb549a..b2aa93b 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -158,6 +158,43 @@ opal_tracepoint_return:
> blr
> #endif
>
> +/*
> + * Make opal call in realmode. This is a generic function to be called
> + * from realmode. It handles endianness.
> + *
> + * r13 - paca pointer
> + * r1 - stack pointer
> + * r0 - opal token
> + */
> +_GLOBAL(opal_call_realmode)
> + mflr r12
> + std r12,_LINK(r1)

This is a bug waiting to happen. Using _LINK(r1) was OK in this
code's previous location, since there we know there is a
INT_FRAME_SIZE-sized stack frame and the _LINK field is basically
unused. Now that you're making this available to call from anywhere,
you can't trash the caller's stack frame like this. You need to use
PPC_LR_STKOFF(r1) instead.

> + ld r2,PACATOC(r13)
> + /* Set opal return address */
> + LOAD_REG_ADDR(r12,return_from_opal_call)
> + mtlr r12
> +
> + mfmsr r12
> +#ifdef __LITTLE_ENDIAN__
> + /* Handle endian-ness */
> + li r11,MSR_LE
> + andc r12,r12,r11
> +#endif
> + mtspr SPRN_HSRR1,r12
> + LOAD_REG_ADDR(r11,opal)
> + ld r12,8(r11)
> + ld r2,0(r11)
> + mtspr SPRN_HSRR0,r12
> + hrfid
> +
> +return_from_opal_call:
> +#ifdef __LITTLE_ENDIAN__
> + FIXUP_ENDIAN
> +#endif
> + ld r12,_LINK(r1)
> + mtlr r12
> + blr

Paul.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/