Re: [Patch]cpuidle: Save current cpu as local variable instead of calling smp_processor_id() in loop

From: Peter Zijlstra
Date: Wed May 18 2016 - 08:41:08 EST


On Wed, May 18, 2016 at 02:30:44PM +0200, Peter Zijlstra wrote:
> void cpu_idle (void)
> {
> + int cpu = smp_processor_id();
> +
> /* endless idle loop with no priority at all */
> while (1) {
> while (!need_resched()) {
> void (*idle)(void);
> - /*
> - * Mark this as an RCU critical section so that
> - * synchronize_kernel() in the unload path waits
> - * for our completion.
> - */
> - rcu_read_lock();
> +
> + if (cpu_isset(cpu, cpu_idle_map))
> + cpu_clear(cpu, cpu_idle_map);
> + rmb();
> idle = pm_idle;
>
> if (!idle)
> idle = default_idle;
>
> - irq_stat[smp_processor_id()].idle_timestamp = jiffies;
> + irq_stat[cpu].idle_timestamp = jiffies;
> idle();
> - rcu_read_unlock();
> }
> schedule();
> }
> }
>
> +void cpu_idle_wait(void)
> +{
> + int cpu;
> + cpumask_t map;
> +
> + for_each_online_cpu(cpu)
> + cpu_set(cpu, cpu_idle_map);
> +
> + wmb();
> + do {
> + ssleep(1);
> + cpus_and(map, cpu_idle_map, cpu_online_map);
> + } while (!cpus_empty(map));
> +}
> +EXPORT_SYMBOL_GPL(cpu_idle_wait);


Which then got 'wrecked' by the below commit.

That commit removes the cpu_idle_state, and thereby removes the need for
the rmb(), since you cannot 'order' one load.

All the idle loop needs to guarantee (and in today's code that's
non-obvious) is that it _must_ reload all values on every loop.


---
commit 783e391b7b5b273cd20856d8f6f4878da8ec31b3
Author: Venki Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
Date: Thu Apr 10 09:49:58 2008 -0700

x86: Simplify cpu_idle_wait

This patch also resolves hangs on boot:
http://lkml.org/lkml/2008/2/23/263
http://bugzilla.kernel.org/show_bug.cgi?id=10093

The bug was causing once-in-few-reboots 10-15 sec wait during boot on
certain laptops.

Earlier commit 40d6a146629b98d8e322b6f9332b182c7cbff3df added
smp_call_function in cpu_idle_wait() to kick cpus that are in tickless
idle. Looking at cpu_idle_wait code at that time, code seemed to be
over-engineered for a case which is rarely used (while changing idle
handler).

Below is a simplified version of cpu_idle_wait, which just makes a dummy
smp_call_function to all cpus, to make them come out of old idle handler
and start using the new idle handler. It eliminates code in the idle
loop to handle cpu_idle_wait.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index be3c7a299f02..43930e73f657 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);

void disable_hlt(void)
{
@@ -190,9 +189,6 @@ void cpu_idle(void)
while (!need_resched()) {
void (*idle)(void);

- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
check_pgt_cache();
rmb();
idle = pm_idle;
@@ -220,40 +216,19 @@ static void do_nothing(void *unused)
{
}

+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
void cpu_idle_wait(void)
{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- /*
- * We waited 1 sec, if a CPU still did not call idle
- * it may be because it is in idle and not waking up
- * because it has nothing to do.
- * Give all the remaining CPUS a kick.
- */
- smp_call_function_mask(map, do_nothing, NULL, 0);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3baf9b9f4c87..46c4c546b499 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override);
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);

static ATOMIC_NOTIFIER_HEAD(idle_notifier);

@@ -173,9 +172,6 @@ void cpu_idle(void)
while (!need_resched()) {
void (*idle)(void);

- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
rmb();
idle = pm_idle;
if (!idle)
@@ -207,40 +203,19 @@ static void do_nothing(void *unused)
{
}

+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
void cpu_idle_wait(void)
{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- /*
- * We waited 1 sec, if a CPU still did not call idle
- * it may be because it is in idle and not waking up
- * because it has nothing to do.
- * Give all the remaining CPUS a kick.
- */
- smp_call_function_mask(map, do_nothing, 0, 0);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);