Re: [PATCH V4 1/3] cpuidle: play_idle: Make play_idle more flexible

From: Daniel Lezcano
Date: Wed Oct 16 2019 - 12:41:33 EST


On 15/10/2019 15:06, Ulf Hansson wrote:
> On Fri, 4 Oct 2019 at 10:32, Daniel Lezcano <daniel.lezcano@xxxxxxxxxx> wrote:
>>
>> The play_idle function has two users, the intel powerclamp and the
>> idle_injection.
>>
>> The idle injection cooling device uses the function via the
>> idle_injection powercap's APIs. Unfortunately, play_idle is currently
>> limited by the idle state depth: by default the deepest idle state is
>> selected. On the ARM[64] platforms, most of the time it is the cluster
>> idle state, the exit latency and the residency can be very high. That
>> reduces the scope of the idle injection usage because the impact on
>> the performances can be very significant.
>>
>> If the idle injection cycles can be done with a shallow state like a
>> retention state, the cooling effect would eventually give similar
>> results than the cpufreq cooling device.
>>
>> In order to prepare the function to receive an idle state parameter,
>> let's replace the 'use_deepest_state' boolean field with 'use_state'
>> and use this value to enter the specific idle state.
>>
>> The current code keeps the default behavior which is go to the deepest
>> idle state.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
>> Acked-by: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx>
>> ---
>> drivers/cpuidle/cpuidle.c | 21 +++++++++++----------
>> include/linux/cpuidle.h | 13 ++++++-------
>> kernel/sched/idle.c | 10 +++++-----
>> 3 files changed, 22 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index 0895b988fa92..f8b54f277589 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -99,31 +99,31 @@ static int find_deepest_state(struct cpuidle_driver *drv,
>> }
>>
>> /**
>> - * cpuidle_use_deepest_state - Set/clear governor override flag.
>> - * @enable: New value of the flag.
>> + * cpuidle_use_state - Force the cpuidle framework to enter an idle state.
>> + * @state: An integer for an idle state
>> *
>> - * Set/unset the current CPU to use the deepest idle state (override governors
>> - * going forward if set).
>> + * Specify an idle state the cpuidle framework must step in and bypass
>> + * the idle state selection process.
>> */
>> -void cpuidle_use_deepest_state(bool enable)
>> +void cpuidle_use_state(int state)
>> {
>> struct cpuidle_device *dev;
>>
>> preempt_disable();
>> dev = cpuidle_get_device();
>> if (dev)
>> - dev->use_deepest_state = enable;
>> + dev->use_state = state;
>> preempt_enable();
>> }
>>
>> /**
>> * cpuidle_find_deepest_state - Find the deepest available idle state.
>> - * @drv: cpuidle driver for the given CPU.
>> - * @dev: cpuidle device for the given CPU.
>> */
>> -int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>> - struct cpuidle_device *dev)
>> +int cpuidle_find_deepest_state(void)
>> {
>> + struct cpuidle_device *dev = cpuidle_get_device();
>> + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
>> +
>> return find_deepest_state(drv, dev, UINT_MAX, 0, false);
>> }
>>
>> @@ -554,6 +554,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
>> static void __cpuidle_device_init(struct cpuidle_device *dev)
>> {
>> memset(dev->states_usage, 0, sizeof(dev->states_usage));
>> + dev->use_state = CPUIDLE_STATE_NOUSE;
>> dev->last_residency = 0;
>> dev->next_hrtimer = 0;
>> }
>> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
>> index 2dc4c6b19c25..ba0751b26e37 100644
>> --- a/include/linux/cpuidle.h
>> +++ b/include/linux/cpuidle.h
>> @@ -15,6 +15,7 @@
>> #include <linux/list.h>
>> #include <linux/hrtimer.h>
>>
>> +#define CPUIDLE_STATE_NOUSE -1
>> #define CPUIDLE_STATE_MAX 10
>> #define CPUIDLE_NAME_LEN 16
>> #define CPUIDLE_DESC_LEN 32
>> @@ -80,11 +81,11 @@ struct cpuidle_driver_kobj;
>> struct cpuidle_device {
>> unsigned int registered:1;
>> unsigned int enabled:1;
>> - unsigned int use_deepest_state:1;
>> unsigned int poll_time_limit:1;
>> unsigned int cpu;
>> ktime_t next_hrtimer;
>>
>> + int use_state;
>> int last_state_idx;
>> int last_residency;
>> u64 poll_limit_ns;
>> @@ -203,19 +204,17 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
>> #endif
>>
>> #ifdef CONFIG_CPU_IDLE
>> -extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>> - struct cpuidle_device *dev);
>> +extern int cpuidle_find_deepest_state(void);
>> extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
>> struct cpuidle_device *dev);
>> -extern void cpuidle_use_deepest_state(bool enable);
>> +extern void cpuidle_use_state(int state);
>> #else
>> -static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>> - struct cpuidle_device *dev)
>> +static inline int cpuidle_find_deepest_state(void)
>> {return -ENODEV; }
>> static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
>> struct cpuidle_device *dev)
>> {return -ENODEV; }
>> -static inline void cpuidle_use_deepest_state(bool enable)
>> +static inline void cpuidle_use_state(int state)
>> {
>> }
>> #endif
>> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
>> index b98283fc6914..17da9cb309e1 100644
>> --- a/kernel/sched/idle.c
>> +++ b/kernel/sched/idle.c
>> @@ -165,7 +165,8 @@ static void cpuidle_idle_call(void)
>> * until a proper wakeup interrupt happens.
>> */
>>
>> - if (idle_should_enter_s2idle() || dev->use_deepest_state) {
>> + if (idle_should_enter_s2idle() ||
>> + dev->use_state != CPUIDLE_STATE_NOUSE) {
>> if (idle_should_enter_s2idle()) {
>> rcu_idle_enter();
>>
>> @@ -181,8 +182,7 @@ static void cpuidle_idle_call(void)
>> tick_nohz_idle_stop_tick();
>> rcu_idle_enter();
>>
>> - next_state = cpuidle_find_deepest_state(drv, dev);
>> - call_cpuidle(drv, dev, next_state);
>> + call_cpuidle(drv, dev, dev->use_state);
>
> This doesn't look correct to me.
>
> More precisely, in the s2idle case, we always pick the deepest state.
> But from the change above, we would invoke call_cpuidle() with
> CPUIDLE_STATE_NOUSE, right!?

There is the condition 'if (idle_should_enter_s2idle())' which gives
another path.

But that is true if the call fails with a negative or zero value for the
entered state we fallback to call_cpuidle with CPUIDLE_STATE_NOUSE. This
context does not appear in the patch.

Perhaps, something like (on top of this patch):

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6bcea3c06ebe..fc7f5216b579 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -177,12 +177,15 @@ static void cpuidle_idle_call(void)
}

rcu_idle_exit();
+ next_state = cpuidle_find_deepest_state();
+ } else {
+ next_state = dev->use_state;
}

tick_nohz_idle_stop_tick();
rcu_idle_enter();

- call_cpuidle(drv, dev, dev->use_state);
+ call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;



>
>> } else {
>> bool stop_tick = true;
>>
>> @@ -328,7 +328,7 @@ void play_idle(unsigned long duration_us)
>> rcu_sleep_check();
>> preempt_disable();
>> current->flags |= PF_IDLE;
>> - cpuidle_use_deepest_state(true);
>> + cpuidle_use_state(cpuidle_find_deepest_state());
>>
>> it.done = 0;
>> hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> @@ -339,7 +339,7 @@ void play_idle(unsigned long duration_us)
>> while (!READ_ONCE(it.done))
>> do_idle();
>>
>> - cpuidle_use_deepest_state(false);
>> + cpuidle_use_state(CPUIDLE_STATE_NOUSE);
>> current->flags &= ~PF_IDLE;
>>
>> preempt_fold_need_resched();
>> --
>> 2.17.1
>>
>
> Kind regards
> Uffe
>


--
<http://www.linaro.org/> Linaro.org â Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog