kernel/timer: avoid spurious ksoftirqd wakeups

From: Marcelo Tosatti
Date: Wed Apr 01 2015 - 21:45:19 EST



It is only necessary to raise timer softirq
in case there are active timers or irq work
to do.

Limit the ksoftirqd wakeup to those cases.

Fixes a latency spike with isolated CPUs and
nohz full mode.

Reported-and-tested-by: Luiz Capitulino <lcapitulino@xxxxxxxxxx>
Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 8c5a197..0c065f9 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -192,7 +192,7 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
* locks the timer base and does the comparison against the given
* jiffie.
*/
-extern unsigned long get_next_timer_interrupt(unsigned long now);
+extern unsigned long get_next_timer_interrupt(unsigned long now, bool *raise_softirq);

/*
* Timer-statistics info:
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a4c4eda..615e276 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
unsigned long rcu_delta_jiffies;
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 time_delta;
+ bool raise_softirq;

time_delta = timekeeping_max_deferment();

@@ -582,9 +583,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
arch_needs_cpu() || irq_work_needs_cpu()) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
+ raise_softirq = true;
} else {
/* Get the next timer wheel timer */
- next_jiffies = get_next_timer_interrupt(last_jiffies);
+ next_jiffies = get_next_timer_interrupt(last_jiffies,
+ &raise_softirq);
delta_jiffies = next_jiffies - last_jiffies;
if (rcu_delta_jiffies < delta_jiffies) {
next_jiffies = last_jiffies + rcu_delta_jiffies;
@@ -703,7 +706,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
*/
tick_do_update_jiffies64(ktime_get());
}
- raise_softirq_irqoff(TIMER_SOFTIRQ);
+ if (raise_softirq)
+ raise_softirq_irqoff(TIMER_SOFTIRQ);
out:
ts->next_jiffies = next_jiffies;
ts->last_jiffies = last_jiffies;
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c5..771f811 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1343,7 +1343,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
* get_next_timer_interrupt - return the jiffy of the next pending timer
* @now: current time (in jiffies)
*/
-unsigned long get_next_timer_interrupt(unsigned long now)
+unsigned long get_next_timer_interrupt(unsigned long now, bool *raise_softirq)
{
struct tvec_base *base = __this_cpu_read(tvec_bases);
unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
@@ -1357,6 +1357,7 @@ unsigned long get_next_timer_interrupt(unsigned long now)

spin_lock(&base->lock);
if (base->active_timers) {
+ *raise_softirq = true;
if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
expires = base->next_timer;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/