Re: [PATCH v5 tip/core/rcu 15/16] rcu: Make RCU-tasks wait for idle tasks

From: Peter Zijlstra
Date: Wed Aug 13 2014 - 14:17:40 EST


On Wed, Aug 13, 2014 at 07:24:07PM +0200, Peter Zijlstra wrote:
> On Wed, Aug 13, 2014 at 04:42:19PM +0200, Peter Zijlstra wrote:
> > Auditing all idle functions will be somewhat of a pain, but its entirely
> > doable. Looking at this stuff, it appears we can clean it up massively;
> > see how the generic cpuidle code already has the broadcast logic in, so
> > we can remove that from the drivers by setting the right flags.
> >
> > We can similarly pull out the leave_mm() call by adding a
> > CPUIDLE_FLAG_TLB_FLUSH. At which point all we'd need to do is mark the
> > intel_idle (and all other cpuidle_state::enter functions with __notrace.
>
> This removes the broadcast stuff from intel_idle.c; processor_idle.c hurts
> my brain, but something similar should be possible.

Still hurts my brain; esp. acpi_idle_enter_bm() which calls a massive
lot of code to disable busmastering, but maybe its doable, we need that
__notrace detectoring.


Also, someone needs to double check this; preferably LenB or Rafael who
have seen this driver before. Teh pain.

---
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -171,32 +171,11 @@ static void lapic_timer_propagate_broadc
(void *)pr, 1);
}

-/* Power(C) State timer broadcast control */
-static void lapic_timer_state_broadcast(struct acpi_processor *pr,
- struct acpi_processor_cx *cx,
- int broadcast)
-{
- int state = cx - pr->power.states;
-
- if (state >= pr->power.timer_broadcast_on_state) {
- unsigned long reason;
-
- reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
- CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
- clockevents_notify(reason, &pr->id);
- }
-}
-
#else

static void lapic_timer_check_state(int state, struct acpi_processor *pr,
struct acpi_processor_cx *cstate) { }
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
-static void lapic_timer_state_broadcast(struct acpi_processor *pr,
- struct acpi_processor_cx *cx,
- int broadcast)
-{
-}

#endif

@@ -717,19 +696,10 @@ static inline void acpi_idle_do_entry(st
static int acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- struct acpi_processor *pr;
struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);

- pr = __this_cpu_read(processors);
-
- if (unlikely(!pr))
- return -EINVAL;
-
- lapic_timer_state_broadcast(pr, cx, 1);
acpi_idle_do_entry(cx);

- lapic_timer_state_broadcast(pr, cx, 0);
-
return index;
}

@@ -785,22 +755,8 @@ static int acpi_idle_enter_simple(struct
return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START);
#endif

- /*
- * Must be done before busmaster disable as we might need to
- * access HPET !
- */
- lapic_timer_state_broadcast(pr, cx, 1);
-
- if (cx->type == ACPI_STATE_C3)
- ACPI_FLUSH_CPU_CACHE();
-
- /* Tell the scheduler that we are going deep-idle: */
- sched_clock_idle_sleep_event();
acpi_idle_do_entry(cx);

- sched_clock_idle_wakeup_event(0);
-
- lapic_timer_state_broadcast(pr, cx, 0);
return index;
}

@@ -843,16 +799,6 @@ static int acpi_idle_enter_bm(struct cpu
}
}

- acpi_unlazy_tlb(smp_processor_id());
-
- /* Tell the scheduler that we are going deep-idle: */
- sched_clock_idle_sleep_event();
- /*
- * Must be done before busmaster disable as we might need to
- * access HPET !
- */
- lapic_timer_state_broadcast(pr, cx, 1);
-
/*
* disable bus master
* bm_check implies we need ARB_DIS
@@ -884,9 +830,6 @@ static int acpi_idle_enter_bm(struct cpu
raw_spin_unlock(&c3_lock);
}

- sched_clock_idle_wakeup_event(0);
-
- lapic_timer_state_broadcast(pr, cx, 0);
return index;
}

@@ -989,6 +932,7 @@ static int acpi_processor_setup_cpuidle_
state->flags |= CPUIDLE_FLAG_TIME_VALID;

state->enter = acpi_idle_enter_c1;
+ state->flags |= CPUIDLE_FLAG_TIMER_STOP;
state->enter_dead = acpi_idle_play_dead;
drv->safe_state_index = count;
break;
@@ -996,15 +940,20 @@ static int acpi_processor_setup_cpuidle_
case ACPI_STATE_C2:
state->flags |= CPUIDLE_FLAG_TIME_VALID;
state->enter = acpi_idle_enter_simple;
+ state->flags |= CPUIDLE_FLAG_TIMER_STOP;
state->enter_dead = acpi_idle_play_dead;
drv->safe_state_index = count;
break;

case ACPI_STATE_C3:
state->flags |= CPUIDLE_FLAG_TIME_VALID;
- state->enter = pr->flags.bm_check ?
- acpi_idle_enter_bm :
- acpi_idle_enter_simple;
+ state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
+ state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+ if (pr->flags.bm_check) {
+ state->enter = acpi_idle_enter_bm;
+ } else {
+ state->enter = acpi_idle_enter_simple;
+ }
break;
}

--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -152,9 +152,11 @@ static void cpuidle_idle_call(void)
* is used from another cpu as a broadcast timer, this call may
* fail if it is not available
*/
- if (broadcast &&
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
- goto use_default;
+ if (broadcast) {
+ if (clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
+ goto use_default;
+ sched_clock_idle_sleep_event();
+ }

trace_cpu_idle_rcuidle(next_state, dev->cpu);

@@ -167,8 +169,10 @@ static void cpuidle_idle_call(void)

trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);

- if (broadcast)
+ if (broadcast) {
+ sched_clock_idle_wakeup_event(0);
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ }

/*
* Give the governor an opportunity to reflect on the outcome

Attachment: pgpOLcgMrZvBE.pgp
Description: PGP signature