[tip:timers/core] hrtimer: Implement support for softirq based hrtimers

From: tip-bot for Anna-Maria Gleixner
Date: Tue Jan 16 2018 - 05:25:58 EST


Commit-ID: 5da70160462e80b0ab8a6960cdd0cdd476907523
Gitweb: https://git.kernel.org/tip/5da70160462e80b0ab8a6960cdd0cdd476907523
Author: Anna-Maria Gleixner <anna-maria@xxxxxxxxxxxxx>
AuthorDate: Thu, 21 Dec 2017 11:41:57 +0100
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Tue, 16 Jan 2018 09:51:22 +0100

hrtimer: Implement support for softirq based hrtimers

hrtimer callbacks are always invoked in hard interrupt context. Several
users in tree require soft interrupt context for their callbacks and
achieve this by combining a hrtimer with a tasklet. The hrtimer schedules
the tasklet in hard interrupt context and the tasklet callback gets invoked
in softirq context later.

That's suboptimal and aside of that the real-time patch moves most of the
hrtimers into softirq context. So adding native support for hrtimers
expiring in softirq context is a valuable extension for both mainline and
the RT patch set.

Each valid hrtimer clock id has two associated hrtimer clock bases: one for
timers expiring in hardirq context and one for timers expiring in softirq
context.

Implement the functionality to associate a hrtimer with the hard or softirq
related clock bases and update the relevant functions to take them into
account when the next expiry time needs to be evaluated.

Add a check into the hard interrupt context handler functions to check
whether the first expiring softirq based timer has expired. If it's expired
the softirq is raised and the accounting of softirq based timers to
evaluate the next expiry time for programming the timer hardware is skipped
until the softirq processing has finished. At the end of the softirq
processing the regular processing is resumed.

Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Anna-Maria Gleixner <anna-maria@xxxxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: John Stultz <john.stultz@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: keescook@xxxxxxxxxxxx
Link: http://lkml.kernel.org/r/20171221104205.7269-29-anna-maria@xxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/hrtimer.h | 21 ++++--
kernel/time/hrtimer.c | 196 ++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 188 insertions(+), 29 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 26ae8a8..c7902ca 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -103,6 +103,7 @@ enum hrtimer_restart {
* @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above)
* @is_rel: Set if the timer was armed relative
+ * @is_soft: Set if hrtimer will be expired in soft interrupt context.
*
* The hrtimer structure must be initialized by hrtimer_init()
*/
@@ -113,6 +114,7 @@ struct hrtimer {
struct hrtimer_clock_base *base;
u8 state;
u8 is_rel;
+ u8 is_soft;
};

/**
@@ -178,13 +180,18 @@ enum hrtimer_base_type {
* @hres_active: State of high resolution mode
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
+ * @softirq_activated: displays, if the softirq is raised - update of softirq
+ * related settings is not required then.
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @expires_next: absolute time of the next event, is required for remote
- * hrtimer enqueue
+ * hrtimer enqueue; it is the total first expiry time (hard
+ * and soft hrtimer are taken into account)
* @next_timer: Pointer to the first expiring timer
+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer: Pointer to the first expiring softirq based timer
* @clock_base: array of clock bases for this cpu
*
* Note: next_timer is just an optimization for __remove_hrtimer().
@@ -196,9 +203,10 @@ struct hrtimer_cpu_base {
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
- unsigned int hres_active : 1,
- in_hrtirq : 1,
- hang_detected : 1;
+ unsigned int hres_active : 1,
+ in_hrtirq : 1,
+ hang_detected : 1,
+ softirq_activated : 1;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int nr_events;
unsigned short nr_retries;
@@ -207,6 +215,8 @@ struct hrtimer_cpu_base {
#endif
ktime_t expires_next;
struct hrtimer *next_timer;
+ ktime_t softirq_expires_next;
+ struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;

@@ -379,7 +389,8 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
* @timer: the timer to be added
* @tim: expiry time
* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
+ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
+ * softirq based mode is considered for debug purpose only!
*/
static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
const enum hrtimer_mode mode)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index ba4674e..d93e3e7 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -411,7 +411,8 @@ static inline void debug_hrtimer_init(struct hrtimer *timer)
debug_object_init(timer, &hrtimer_debug_descr);
}

-static inline void debug_hrtimer_activate(struct hrtimer *timer)
+static inline void debug_hrtimer_activate(struct hrtimer *timer,
+ enum hrtimer_mode mode)
{
debug_object_activate(timer, &hrtimer_debug_descr);
}
@@ -444,8 +445,10 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer)
EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);

#else
+
static inline void debug_hrtimer_init(struct hrtimer *timer) { }
-static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
+static inline void debug_hrtimer_activate(struct hrtimer *timer,
+ enum hrtimer_mode mode) { }
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
#endif

@@ -460,7 +463,7 @@ debug_init(struct hrtimer *timer, clockid_t clockid,
static inline void debug_activate(struct hrtimer *timer,
enum hrtimer_mode mode)
{
- debug_hrtimer_activate(timer);
+ debug_hrtimer_activate(timer, mode);
trace_hrtimer_start(timer, mode);
}

@@ -503,7 +506,10 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
if (expires < expires_next) {
expires_next = expires;
- cpu_base->next_timer = timer;
+ if (timer->is_soft)
+ cpu_base->softirq_next_timer = timer;
+ else
+ cpu_base->next_timer = timer;
}
}
/*
@@ -520,21 +526,39 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
* Recomputes cpu_base::*next_timer and returns the earliest expires_next but
* does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
*
+ * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
+ * those timers will get run whenever the softirq gets handled, at the end of
+ * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
+ *
+ * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
+ * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
+ * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
+ *
* @active_mask must be one of:
- * - HRTIMER_ACTIVE,
+ * - HRTIMER_ACTIVE_ALL,
* - HRTIMER_ACTIVE_SOFT, or
* - HRTIMER_ACTIVE_HARD.
*/
-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base,
- unsigned int active_mask)
+static ktime_t
+__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
{
unsigned int active;
+ struct hrtimer *next_timer = NULL;
ktime_t expires_next = KTIME_MAX;

- cpu_base->next_timer = NULL;
+ if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
+ active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+ cpu_base->softirq_next_timer = NULL;
+ expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
+
+ next_timer = cpu_base->softirq_next_timer;
+ }

- active = cpu_base->active_bases & active_mask;
- expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+ if (active_mask & HRTIMER_ACTIVE_HARD) {
+ active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+ cpu_base->next_timer = next_timer;
+ expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+ }

return expires_next;
}
@@ -545,8 +569,14 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;

- return ktime_get_update_offsets_now(&base->clock_was_set_seq,
+ ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
offs_real, offs_boot, offs_tai);
+
+ base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
+ base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
+ base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
+
+ return now;
}

/*
@@ -573,7 +603,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
ktime_t expires_next;

- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+ /*
+ * Find the current next expiration time.
+ */
+ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
+
+ if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
+ /*
+ * When the softirq is activated, hrtimer has to be
+ * programmed with the first hard hrtimer because soft
+ * timer interrupt could occur too late.
+ */
+ if (cpu_base->softirq_activated)
+ expires_next = __hrtimer_get_next_event(cpu_base,
+ HRTIMER_ACTIVE_HARD);
+ else
+ cpu_base->softirq_expires_next = expires_next;
+ }

if (skip_equal && expires_next == cpu_base->expires_next)
return;
@@ -700,7 +746,7 @@ static inline void retrigger_next_event(void *arg) { }
*
* Called with interrupts disabled and base->cpu_base.lock held
*/
-static void hrtimer_reprogram(struct hrtimer *timer)
+static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
struct hrtimer_clock_base *base = timer->base;
@@ -709,6 +755,37 @@ static void hrtimer_reprogram(struct hrtimer *timer)
WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);

/*
+ * CLOCK_REALTIME timer might be requested with an absolute
+ * expiry time which is less than base->offset. Set it to 0.
+ */
+ if (expires < 0)
+ expires = 0;
+
+ if (timer->is_soft) {
+ /*
+ * soft hrtimer could be started on a remote CPU. In this
+ * case softirq_expires_next needs to be updated on the
+ * remote CPU. The soft hrtimer will not expire before the
+ * first hard hrtimer on the remote CPU -
+ * hrtimer_check_target() prevents this case.
+ */
+ struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
+
+ if (timer_cpu_base->softirq_activated)
+ return;
+
+ if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
+ return;
+
+ timer_cpu_base->softirq_next_timer = timer;
+ timer_cpu_base->softirq_expires_next = expires;
+
+ if (!ktime_before(expires, timer_cpu_base->expires_next) ||
+ !reprogram)
+ return;
+ }
+
+ /*
* If the timer is not on the current cpu, we cannot reprogram
* the other cpus clock event device.
*/
@@ -725,13 +802,6 @@ static void hrtimer_reprogram(struct hrtimer *timer)
if (cpu_base->in_hrtirq)
return;

- /*
- * CLOCK_REALTIME timer might be requested with an absolute
- * expiry time which is less than base->offset. Set it to 0.
- */
- if (expires < 0)
- expires = 0;
-
if (expires >= cpu_base->expires_next)
return;

@@ -957,6 +1027,31 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
return tim;
}

+static void
+hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
+{
+ ktime_t expires;
+
+ /*
+ * Find the next SOFT expiration.
+ */
+ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
+
+ /*
+ * reprogramming needs to be triggered, even if the next soft
+ * hrtimer expires at the same time than the next hard
+ * hrtimer. cpu_base->softirq_expires_next needs to be updated!
+ */
+ if (expires == KTIME_MAX)
+ return;
+
+ /*
+ * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
+ * cpu_base->*expires_next is only set by hrtimer_reprogram()
+ */
+ hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
+}
+
static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
u64 delta_ns, const enum hrtimer_mode mode,
struct hrtimer_clock_base *base)
@@ -978,13 +1073,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,

return enqueue_hrtimer(timer, new_base, mode);
}
+
/**
* hrtimer_start_range_ns - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
* @delta_ns: "slack" range for the timer
* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
- * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED)
+ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
+ * softirq based mode is considered for debug purpose only!
*/
void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
u64 delta_ns, const enum hrtimer_mode mode)
@@ -992,10 +1089,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;

+ /*
+ * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
+ * match.
+ */
+ WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+
base = lock_hrtimer_base(timer, &flags);

if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
- hrtimer_reprogram(timer);
+ hrtimer_reprogram(timer, true);

unlock_hrtimer_base(timer, &flags);
}
@@ -1094,7 +1197,7 @@ u64 hrtimer_get_next_event(void)
raw_spin_lock_irqsave(&cpu_base->lock, flags);

if (!__hrtimer_hres_active(cpu_base))
- expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);

raw_spin_unlock_irqrestore(&cpu_base->lock, flags);

@@ -1304,6 +1407,23 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
}
}

+static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
+{
+ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+ unsigned long flags;
+ ktime_t now;
+
+ raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+ now = hrtimer_update_base(cpu_base);
+ __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
+
+ cpu_base->softirq_activated = 0;
+ hrtimer_update_softirq_timer(cpu_base, true);
+
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+}
+
#ifdef CONFIG_HIGH_RES_TIMERS

/*
@@ -1334,10 +1454,16 @@ retry:
*/
cpu_base->expires_next = KTIME_MAX;

+ if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ cpu_base->softirq_expires_next = KTIME_MAX;
+ cpu_base->softirq_activated = 1;
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);

/* Reevaluate the clock bases for the next expiry */
- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
/*
* Store the new expiry value so the migration code can verify
* against it.
@@ -1441,6 +1567,13 @@ void hrtimer_run_queues(void)

raw_spin_lock_irqsave(&cpu_base->lock, flags);
now = hrtimer_update_base(cpu_base);
+
+ if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ cpu_base->softirq_expires_next = KTIME_MAX;
+ cpu_base->softirq_activated = 1;
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
}
@@ -1622,6 +1755,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->cpu = cpu;
cpu_base->hres_active = 0;
cpu_base->expires_next = KTIME_MAX;
+ cpu_base->softirq_expires_next = KTIME_MAX;
return 0;
}

@@ -1665,6 +1799,12 @@ int hrtimers_dead_cpu(unsigned int scpu)
BUG_ON(cpu_online(scpu));
tick_cancel_sched_timer(scpu);

+ /*
+ * this BH disable ensures that raise_softirq_irqoff() does
+ * not wakeup ksoftirqd (and acquire the pi-lock) while
+ * holding the cpu_base lock
+ */
+ local_bh_disable();
local_irq_disable();
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = this_cpu_ptr(&hrtimer_bases);
@@ -1680,12 +1820,19 @@ int hrtimers_dead_cpu(unsigned int scpu)
&new_base->clock_base[i]);
}

+ /*
+ * The migration might have changed the first expiring softirq
+ * timer on this CPU. Update it.
+ */
+ hrtimer_update_softirq_timer(new_base, false);
+
raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);

/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
local_irq_enable();
+ local_bh_enable();
return 0;
}

@@ -1694,6 +1841,7 @@ int hrtimers_dead_cpu(unsigned int scpu)
void __init hrtimers_init(void)
{
hrtimers_prepare_cpu(smp_processor_id());
+ open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
}

/**