Re: [Patch]cpuidle: Save current cpu as local variable instead of calling smp_processor_id() in loop

From: Gaurav Jindal (Gaurav Jindal)
Date: Thu May 19 2016 - 09:35:56 EST


Hi Peterz

Thanks a lot for the overwhelming response :)

I will look into the change history to have a pervasive understanding of
the framework.


On Wed, May 18, 2016 at 02:40:52PM +0200, Peter Zijlstra wrote:
> On Wed, May 18, 2016 at 02:30:44PM +0200, Peter Zijlstra wrote:
> > void cpu_idle (void)
> > {
> > + int cpu = smp_processor_id();
> > +
> > /* endless idle loop with no priority at all */
> > while (1) {
> > while (!need_resched()) {
> > void (*idle)(void);
> > - /*
> > - * Mark this as an RCU critical section so that
> > - * synchronize_kernel() in the unload path waits
> > - * for our completion.
> > - */
> > - rcu_read_lock();
> > +
> > + if (cpu_isset(cpu, cpu_idle_map))
> > + cpu_clear(cpu, cpu_idle_map);
> > + rmb();
> > idle = pm_idle;
> >
> > if (!idle)
> > idle = default_idle;
> >
> > - irq_stat[smp_processor_id()].idle_timestamp = jiffies;
> > + irq_stat[cpu].idle_timestamp = jiffies;
> > idle();
> > - rcu_read_unlock();
> > }
> > schedule();
> > }
> > }
> >
> > +void cpu_idle_wait(void)
> > +{
> > + int cpu;
> > + cpumask_t map;
> > +
> > + for_each_online_cpu(cpu)
> > + cpu_set(cpu, cpu_idle_map);
> > +
> > + wmb();
> > + do {
> > + ssleep(1);
> > + cpus_and(map, cpu_idle_map, cpu_online_map);
> > + } while (!cpus_empty(map));
> > +}
> > +EXPORT_SYMBOL_GPL(cpu_idle_wait);
>
>
> Which then got 'wrecked' by the below commit.
>
> That commit removes the cpu_idle_state, and thereby removes the need for
> the rmb(), since you cannot 'order' one load.
>
> All the idle loop needs to guarantee (and in today's code that's
> non-obvious) is that it _must_ reload all values on every loop.
>
>
> ---
> commit 783e391b7b5b273cd20856d8f6f4878da8ec31b3
> Author: Venki Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
> Date: Thu Apr 10 09:49:58 2008 -0700
>
> x86: Simplify cpu_idle_wait
>
> This patch also resolves hangs on boot:
> http://lkml.org/lkml/2008/2/23/263
> http://bugzilla.kernel.org/show_bug.cgi?id=10093
>
> The bug was causing once-in-few-reboots 10-15 sec wait during boot on
> certain laptops.
>
> Earlier commit 40d6a146629b98d8e322b6f9332b182c7cbff3df added
> smp_call_function in cpu_idle_wait() to kick cpus that are in tickless
> idle. Looking at cpu_idle_wait code at that time, code seemed to be
> over-engineered for a case which is rarely used (while changing idle
> handler).
>
> Below is a simplified version of cpu_idle_wait, which just makes a dummy
> smp_call_function to all cpus, to make them come out of old idle handler
> and start using the new idle handler. It eliminates code in the idle
> loop to handle cpu_idle_wait.
>
> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
>
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index be3c7a299f02..43930e73f657 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
> */
> void (*pm_idle)(void);
> EXPORT_SYMBOL(pm_idle);
> -static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
>
> void disable_hlt(void)
> {
> @@ -190,9 +189,6 @@ void cpu_idle(void)
> while (!need_resched()) {
> void (*idle)(void);
>
> - if (__get_cpu_var(cpu_idle_state))
> - __get_cpu_var(cpu_idle_state) = 0;
> -
> check_pgt_cache();
> rmb();
> idle = pm_idle;
> @@ -220,40 +216,19 @@ static void do_nothing(void *unused)
> {
> }
>
> +/*
> + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> + * pm_idle and update to new pm_idle value. Required while changing pm_idle
> + * handler on SMP systems.
> + *
> + * Caller must have changed pm_idle to the new value before the call. Old
> + * pm_idle value will not be used by any CPU after the return of this function.
> + */
> void cpu_idle_wait(void)
> {
> - unsigned int cpu, this_cpu = get_cpu();
> - cpumask_t map, tmp = current->cpus_allowed;
> -
> - set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
> - put_cpu();
> -
> - cpus_clear(map);
> - for_each_online_cpu(cpu) {
> - per_cpu(cpu_idle_state, cpu) = 1;
> - cpu_set(cpu, map);
> - }
> -
> - __get_cpu_var(cpu_idle_state) = 0;
> -
> - wmb();
> - do {
> - ssleep(1);
> - for_each_online_cpu(cpu) {
> - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
> - cpu_clear(cpu, map);
> - }
> - cpus_and(map, map, cpu_online_map);
> - /*
> - * We waited 1 sec, if a CPU still did not call idle
> - * it may be because it is in idle and not waking up
> - * because it has nothing to do.
> - * Give all the remaining CPUS a kick.
> - */
> - smp_call_function_mask(map, do_nothing, NULL, 0);
> - } while (!cpus_empty(map));
> -
> - set_cpus_allowed(current, tmp);
> + smp_mb();
> + /* kick all the CPUs so that they exit out of pm_idle */
> + smp_call_function(do_nothing, NULL, 0, 1);
> }
> EXPORT_SYMBOL_GPL(cpu_idle_wait);
>
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 3baf9b9f4c87..46c4c546b499 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override);
> */
> void (*pm_idle)(void);
> EXPORT_SYMBOL(pm_idle);
> -static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
>
> static ATOMIC_NOTIFIER_HEAD(idle_notifier);
>
> @@ -173,9 +172,6 @@ void cpu_idle(void)
> while (!need_resched()) {
> void (*idle)(void);
>
> - if (__get_cpu_var(cpu_idle_state))
> - __get_cpu_var(cpu_idle_state) = 0;
> -
> rmb();
> idle = pm_idle;
> if (!idle)
> @@ -207,40 +203,19 @@ static void do_nothing(void *unused)
> {
> }
>
> +/*
> + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> + * pm_idle and update to new pm_idle value. Required while changing pm_idle
> + * handler on SMP systems.
> + *
> + * Caller must have changed pm_idle to the new value before the call. Old
> + * pm_idle value will not be used by any CPU after the return of this function.
> + */
> void cpu_idle_wait(void)
> {
> - unsigned int cpu, this_cpu = get_cpu();
> - cpumask_t map, tmp = current->cpus_allowed;
> -
> - set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
> - put_cpu();
> -
> - cpus_clear(map);
> - for_each_online_cpu(cpu) {
> - per_cpu(cpu_idle_state, cpu) = 1;
> - cpu_set(cpu, map);
> - }
> -
> - __get_cpu_var(cpu_idle_state) = 0;
> -
> - wmb();
> - do {
> - ssleep(1);
> - for_each_online_cpu(cpu) {
> - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
> - cpu_clear(cpu, map);
> - }
> - cpus_and(map, map, cpu_online_map);
> - /*
> - * We waited 1 sec, if a CPU still did not call idle
> - * it may be because it is in idle and not waking up
> - * because it has nothing to do.
> - * Give all the remaining CPUS a kick.
> - */
> - smp_call_function_mask(map, do_nothing, 0, 0);
> - } while (!cpus_empty(map));
> -
> - set_cpus_allowed(current, tmp);
> + smp_mb();
> + /* kick all the CPUs so that they exit out of pm_idle */
> + smp_call_function(do_nothing, NULL, 0, 1);
> }
> EXPORT_SYMBOL_GPL(cpu_idle_wait);
>