Re: [PATCH v2 7/9] powerpc/powernv: Add platform support for stop instruction

From: Gautham R Shenoy
Date: Wed May 18 2016 - 13:57:22 EST


Hi Shreyas,

On Tue, May 03, 2016 at 01:54:36PM +0530, Shreyas B. Prabhu wrote:
> POWER ISA v3 defines a new idle processor core mechanism. In summary,
> a) new instruction named stop is added. This instruction replaces
> instructions like nap, sleep, rvwinkle.
> b) new per thread SPR named PSSCR is added which controls the behavior
> of stop instruction.
>
> PSSCR has following key fields
> Bits 0:3 - Power-Saving Level Status. This field indicates the lowest
> power-saving state the thread entered since stop instruction was last
> executed.
>
> Bit 42 - Enable State Loss
> 0 - No state is lost irrespective of other fields
> 1 - Allows state loss
>
> Bits 44:47 - Power-Saving Level Limit
> This limits the power-saving level that can be entered into.
>
> Bits 60:63 - Requested Level
> Used to specify which power-saving level must be entered on executing
> stop instruction
>
> This patch adds support for stop instruction and PSSCR handling.
>
> Signed-off-by: Shreyas B. Prabhu <shreyas@xxxxxxxxxxxxxxxxxx>

[..snip..]

> diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
> index 6a24769..d85f834 100644
> --- a/arch/powerpc/kernel/idle_power7.S
> +++ b/arch/powerpc/kernel/idle_power7.S
> @@ -46,7 +46,7 @@ core_idle_lock_held:
> power7_enter_nap_mode:
> #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> /* Tell KVM we're napping */
> - li r4,KVM_HWTHREAD_IN_NAP
> + li r4,KVM_HWTHREAD_IN_IDLE
> stb r4,HSTATE_HWTHREAD_STATE(r13)
> #endif
> stb r3,PACA_THREAD_IDLE_STATE(r13)
> diff --git a/arch/powerpc/kernel/idle_power_common.S b/arch/powerpc/kernel/idle_power_common.S
> index ff7a541..f260fa8 100644
> --- a/arch/powerpc/kernel/idle_power_common.S
> +++ b/arch/powerpc/kernel/idle_power_common.S
> @@ -96,11 +96,35 @@ _GLOBAL(power_powersave_common)
> * back to reset vector.
> */
> _GLOBAL(power7_restore_hyp_resource)
> + GET_PACA(r13)
> +BEGIN_FTR_SECTION_NESTED(888)
> + /*
> + * POWER ISA 3. Use PSSCR to determine if we
> + * are waking up from deep idle state
> + */
> + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> +
> + mfspr r5,SPRN_PSSCR
> + /*
> + * 0-4 bits correspond to Power-Saving Level Status
> + * which indicates the idle state we are waking up from
> + */
> + rldicl r5,r5,4,60
> + cmpd r5,r4
> + bge power_stop_wakeup_hyp_loss
> /*
> + * Waking up without hypervisor state loss. Return to
> + * reset vector
> + */
> + blr
> +
> +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300,CPU_FTR_ARCH_300,888)
> + /*
> + * POWER ISA 2.07 or less.
> * Check if last bit of HSPGR0 is set. This indicates whether we are
> * waking up from winkle.
> */
> - GET_PACA(r13)
> clrldi r5,r13,63
> clrrdi r13,r13,1
> cmpwi cr4,r5,1
> diff --git a/arch/powerpc/kernel/idle_power_stop.S b/arch/powerpc/kernel/idle_power_stop.S
> new file mode 100644
> index 0000000..6c86c56
> --- /dev/null
> +++ b/arch/powerpc/kernel/idle_power_stop.S
> @@ -0,0 +1,221 @@
> +#include <linux/threads.h>
> +
> +#include <asm/processor.h>
> +#include <asm/cputable.h>
> +#include <asm/thread_info.h>
> +#include <asm/ppc_asm.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/ppc-opcode.h>
> +#include <asm/hw_irq.h>
> +#include <asm/kvm_book3s_asm.h>
> +#include <asm/opal.h>
> +#include <asm/cpuidle.h>
> +#include <asm/book3s/64/mmu-hash.h>
> +#include <asm/exception-64s.h>
> +
> +#undef DEBUG
> +
> +/*
> + * rA - Requested stop state
> + * rB - Spare reg that can be used
> + */
> +#define PSSCR_REQUEST_STATE(rA, rB) \
> + ld rB, PACA_THREAD_PSSCR(r13); \
> + or rB,rB,rA; \
> + mtspr SPRN_PSSCR, rB; \
> +
> + .text
> +
> + .globl power_enter_stop
> +power_enter_stop:
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> + /* Tell KVM we're napping */
> + li r4,KVM_HWTHREAD_IN_IDLE
> + stb r4,HSTATE_HWTHREAD_STATE(r13)
> +#endif
> + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> + cmpd cr3,r3,r4

It is not clear what r3 is supposed to contain at this point. I think
it should contain the requested stop state. But I might be wrong!
Perhaps a comment above power_enter_stop can clarify that.

> + bge 2f
> + IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +2:
> + lbz r7,PACA_THREAD_MASK(r13)
> + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +
> +lwarx_loop1:
> + lwarx r15,0,r14
> + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
> + bnel core_idle_lock_held

The definition of core_idle_lock_held below jumps to lwarx_loop2
instead of doing a blr once it observed that the LOCK_BIT is no longer
set. This doesn't seem correct since the purpose of
core_idle_lock_held is to spin until the LOCK_BIT is cleared and then
resume whatever we were supposed to do next.

Can you clarify this part ?

> + andc r15,r15,r7 /* Clear thread bit */
> +
> + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
> + stwcx. r15,0,r14
> + bne- lwarx_loop1
> +
> + /*
> + * Note all register i.e per-core, per-subcore or per-thread is saved
> + * here since any thread in the core might wake up first
> + */
> + mfspr r3,SPRN_RPR
> + std r3,_RPR(r1)
> + mfspr r3,SPRN_SPURR
> + std r3,_SPURR(r1)
> + mfspr r3,SPRN_PURR
> + std r3,_PURR(r1)
> + mfspr r3,SPRN_TSCR
> + std r3,_TSCR(r1)
> + mfspr r3,SPRN_DSCR
> + std r3,_DSCR(r1)
> + mfspr r3,SPRN_AMOR
> + std r3,_AMOR(r1)
> +
> + IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +
> +
> +_GLOBAL(power_stop)
> + PSSCR_REQUEST_STATE(r3,r4)
> + li r4, 1
> + LOAD_REG_ADDR(r5,power_enter_stop)
> + b power_powersave_common
> +
> +_GLOBAL(power_stop0)
> + li r3,0
> + li r4,1
> + LOAD_REG_ADDR(r5,power_enter_stop)
> + PSSCR_REQUEST_STATE(r3,r4)

r4 will get clobbered at this point. Move PSSCR_REQUEST_STATE before
"li r4,1".

Also why cant this simply call "power_stop" having set r3
to 0 ?


> + b power_powersave_common
> +
> +_GLOBAL(power_stop_wakeup_hyp_loss)
> + ld r2,PACATOC(r13);
> + ld r1,PACAR1(r13)
> + /*
> + * Before entering any idle state, the NVGPRs are saved in the stack
> + * and they are restored before switching to the process context. Hence
> + * until they are restored, they are free to be used.
> + *
> + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
> + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
> + * wakeup reason if we branch to kvm_start_guest.
> + */

Retain the comment from an earlier patch explaning why LR is being
cached in r17.

> + mflr r17
> + mfspr r16,SPRN_SRR1
> +BEGIN_FTR_SECTION
> + CHECK_HMI_INTERRUPT
> +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> +
> + lbz r7,PACA_THREAD_MASK(r13)
> + ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +lwarx_loop2:
> + lwarx r15,0,r14
> + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
> + /*
> + * Lock bit is set in one of the 2 cases-
> + * a. In the stop enter path, the last thread is executing
> + * fastsleep workaround code.
> + * b. In the wake up path, another thread is resyncing timebase or
> + * restoring context
> + * In either case loop until the lock bit is cleared.
> + */
> + bne core_idle_lock_held
> +
> + cmpwi cr2,r15,0
> + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
> + and r4,r4,r15
> + cmpwi cr1,r4,0 /* Check if first in subcore */
> +
> + or r15,r15,r7 /* Set thread bit */
> +
> + beq cr1,first_thread_in_subcore
> +
> + /* Not first thread in subcore to wake up */
> + stwcx. r15,0,r14
> + bne- lwarx_loop2
> + isync
> + b common_exit

The code from lwarx_loop2 till the end of the definition of
common_exit is the same as the lwarx_loop2 to common_exit in
idle_power7.S. Well, except for a minor bit in the manner in which
return from core_idle_lock_held is handled and the fact that we're not
defining pnv_fastsleep_workaround_at_exit immediately in
first_thread_in_core. I prefer the original version where
core_idle_lock_held does a blr instead of explicitly jumping back to
lwarx_loop2 since it can be invoked safely from multiple places.

Can we move this to a common place and invoke it from these two places
instead of duplicating the code ?

> +
> +core_idle_lock_held:
> + HMT_LOW
> +core_idle_lock_loop:
> + lwz r15,0(14)
> + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
> + bne core_idle_lock_loop
> + HMT_MEDIUM
> + b lwarx_loop2
> +
> +first_thread_in_subcore:
> + /* First thread in subcore to wakeup */
> + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
> + stwcx. r15,0,r14
> + bne- lwarx_loop2
> + isync
> +
> + /*
> + * If waking up from sleep, subcore state is not lost. Hence
> + * skip subcore state restore
> + */
> + bne cr4,subcore_state_restored
> +
> + /* Restore per-subcore state */
> + ld r4,_RPR(r1)
> + mtspr SPRN_RPR,r4
> + ld r4,_AMOR(r1)
> + mtspr SPRN_AMOR,r4
> +
> +subcore_state_restored:
> + /*
> + * Check if the thread is also the first thread in the core. If not,
> + * skip to clear_lock.
> + */
> + bne cr2,clear_lock
> +
> +first_thread_in_core:

I suppose we don't need the pnv_fastsleep_workaround_at_exit at this
point anymore.

> +
> +timebase_resync:
> + /* Do timebase resync if we are waking up from sleep. Use cr3 value
> + * set in exceptions-64s.S */
> + ble cr3,clear_lock
> + /* Time base re-sync */
> + li r0,OPAL_RESYNC_TIMEBASE
> + bl opal_call_realmode;
> +
> + /*
> + * If waking up from sleep, per core state is not lost, skip to
> + * clear_lock.
> + */
> + bne cr4,clear_lock
> +
> + /* Restore per core state */
> + ld r4,_TSCR(r1)
> + mtspr SPRN_TSCR,r4
> +
> +clear_lock:
> + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
> + lwsync
> + stw r15,0(r14)
> +
> +common_exit:
> + /*
> + * Common to all threads.
> + *
> + * If waking up from sleep, hypervisor state is not lost. Hence
> + * skip hypervisor state restore.
> + */
> + bne cr4,hypervisor_state_restored
> +
> + /* Waking up from deep idle state */
> +
> + /* Restore per thread state */
> + bl __restore_cpu_power8
> +
> + ld r4,_SPURR(r1)
> + mtspr SPRN_SPURR,r4
> + ld r4,_PURR(r1)
> + mtspr SPRN_PURR,r4
> + ld r4,_DSCR(r1)
> + mtspr SPRN_DSCR,r4
> +
> +hypervisor_state_restored:
> +
> + mtspr SPRN_SRR1,r16
> + mtlr r17
> + blr

[..snip..]

> @@ -264,6 +275,30 @@ static int __init pnv_init_idle_states(void)
> goto out_free;
> }
>
> + if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val),
> + GFP_KERNEL);

Need to handle the case whe the kcalloc fails to allocate memory for
psscr_val here.

> + if (of_property_read_u64_array(power_mgt,
> + "ibm,cpu-idle-state-psscr",
> + psscr_val, dt_idle_states)) {
> + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
> + goto out_free_psscr;
> + }

The remainder of the patch looks ok.

--
Thanks and Regards
gautham.