Re: [PATCH 1/2] timer: Avoid waking up an idle-core by migrate running timer

From: Thomas Gleixner
Date: Tue Apr 14 2015 - 19:13:24 EST


On Tue, 31 Mar 2015, Viresh Kumar wrote:
> @@ -1189,12 +1195,41 @@ static inline void __run_timers(struct tvec_base *base)
> cascade(base, &base->tv5, INDEX(3));
> ++base->timer_jiffies;
> list_replace_init(base->tv1.vec + index, head);
> +
> +again:
> while (!list_empty(head)) {
> void (*fn)(unsigned long);
> unsigned long data;
> bool irqsafe;
>
> - timer = list_first_entry(head, struct timer_list,entry);
> + timer = list_first_entry(head, struct timer_list, entry);
> +
> + if (unlikely(timer_running(timer))) {
> + /*
> + * The only way to get here is if the handler,
> + * running on another base, re-queued itself on
> + * this base, and the handler hasn't finished
> + * yet.
> + */
> +
> + if (list_is_last(&timer->entry, head)) {
> + /*
> + * Drop lock, so that TIMER_RUNNING can
> + * be cleared on another base.
> + */
> + spin_unlock(&base->lock);
> +
> + while (timer_running(timer))
> + cpu_relax();
> +
> + spin_lock(&base->lock);
> + } else {
> + /* Rotate the list and try someone else */
> + list_move_tail(&timer->entry, head);
> + }

Can we please stick that timer into the next bucket and be done with it?

> + goto again;

"continue;" is old school, right?

> + }
> +
> fn = timer->function;
> data = timer->data;
> irqsafe = tbase_get_irqsafe(timer->base);
> @@ -1202,6 +1237,7 @@ static inline void __run_timers(struct tvec_base *base)
> timer_stats_account_timer(timer);
>
> base->running_timer = timer;
> + timer_set_running(timer);
> detach_expired_timer(timer, base);
>
> if (irqsafe) {
> @@ -1213,6 +1249,25 @@ static inline void __run_timers(struct tvec_base *base)
> call_timer_fn(timer, fn, data);
> spin_lock_irq(&base->lock);
> }
> +
> + /*
> + * Handler running on this base, re-queued itself on
> + * another base ?
> + */
> + if (unlikely(timer->base != base)) {
> + unsigned long flags;
> + struct tvec_base *tbase;
> +
> + spin_unlock(&base->lock);
> +
> + tbase = lock_timer_base(timer, &flags);
> + timer_clear_running(timer);
> + spin_unlock(&tbase->lock);
> +
> + spin_lock(&base->lock);
> + } else {
> + timer_clear_running(timer);
> + }

Aside of the above this is really horrible. Why not doing the obvious:

__mod_timer()

if (base != newbase) {
if (timer_running()) {
list_add(&base->migration_list);
goto out_unlock;
}
.....

__run_timers()

while(!list_empty(head)) {
....
}

if (unlikely(!list_empty(&base->migration_list)) {
/* dequeue and requeue again */
}

Simple, isn't it?

No new flags in the timer base, no concurrent expiry, no extra
conditionals in the expiry path. Just a single extra check at the end
of the softirq handler for this rare condition instead of imposing all
that nonsense to the hotpath.

We might even optimize that:

if (timer_running()) {
list_add(&base->migration_list);
base->preferred_target = newbase;
goto out_unlock;
}

if (unlikely(!list_empty(&base->migration_list)) {
/* dequeue and requeue again */
while (!list_empty(&base->migration_list)) {
dequeue_timer();
newbase = base->preferred_target;
unlock(base);
lock(newbase);
enqueue(newbase);
unlock(newbase);
lock(base);
}
}

Thanks,

tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/