[PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer

From: Peter Zijlstra
Date: Wed Jun 03 2015 - 09:55:26 EST


Currently an hrtimer callback function cannot free its own timer
because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK
after it. Freeing the timer would result in a clear use-after-free.

Solve this by using a scheme similar to regular timers; track the
current running timer in hrtimer_clock_base::running.

Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/hrtimer.h | 35 ++++++++++++++---------------------
kernel/time/hrtimer.c | 48 ++++++++++++++++++++++--------------------------
2 files changed, 36 insertions(+), 47 deletions(-)

--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,34 +53,25 @@ enum hrtimer_restart {
*
* 0x00 inactive
* 0x01 enqueued into rbtree
- * 0x02 callback function running
- * 0x04 timer is migrated to another cpu
+ * 0x02 timer is migrated to another cpu
*
- * Special cases:
- * 0x03 callback function running and enqueued
- * (was requeued on another CPU)
- * 0x05 timer was migrated on CPU hotunplug
+ * The callback state is not part of the timer->state because clearing it would
+ * mean touching the timer after the callback, this makes it impossible to free
+ * the timer from the callback function.
*
- * The "callback function running and enqueued" status is only possible on
- * SMP. It happens for example when a posix timer expired and the callback
+ * Therefore we track the callback state in timer->base->running == timer.
+ *
+ * On SMP it is possible to have a "callback function running and enqueued"
+ * status. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
* and reacquiring the base lock of the hrtimer, another CPU can deliver the
- * signal and rearm the timer. We have to preserve the callback running state,
- * as otherwise the timer could be removed before the softirq code finishes the
- * the handling of the timer.
- *
- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
- * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
- * also affects HRTIMER_STATE_MIGRATE where the preservation is not
- * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
- * enqueued on the new cpu.
+ * signal and rearm the timer.
*
* All state transitions are protected by cpu_base->lock.
*/
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
-#define HRTIMER_STATE_CALLBACK 0x02
-#define HRTIMER_STATE_MIGRATE 0x04
+#define HRTIMER_STATE_MIGRATE 0x02

/**
* struct hrtimer - the basic hrtimer structure
@@ -153,6 +144,7 @@ struct hrtimer_clock_base {
struct timerqueue_head active;
ktime_t (*get_time)(void);
ktime_t offset;
+ struct hrtimer *running;
} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));

enum hrtimer_base_type {
@@ -402,7 +394,8 @@ extern u64 hrtimer_get_next_event(void);
*/
static inline int hrtimer_active(const struct hrtimer *timer)
{
- return timer->state != HRTIMER_STATE_INACTIVE;
+ return timer->state != HRTIMER_STATE_INACTIVE ||
+ timer->base->running == timer;
}

/*
@@ -419,7 +412,7 @@ static inline int hrtimer_is_queued(stru
*/
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
- return timer->state & HRTIMER_STATE_CALLBACK;
+ return timer->base->running == timer;
}

/* Forward a hrtimer so it expires after now: */
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -111,6 +111,13 @@ static inline int hrtimer_clockid_to_bas
#ifdef CONFIG_SMP

/*
+ * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
+ * such that hrtimer_callback_running() can unconditionally dereference
+ * timer->base.
+ */
+static struct hrtimer_clock_base migration_base;
+
+/*
* We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
* means that all timers which are tied to this base via timer->base are
* locked, and the base itself is locked too.
@@ -119,8 +126,8 @@ static inline int hrtimer_clockid_to_bas
* be found on the lists/queues.
*
* When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
+ * possible to set timer->base = &migration_base and drop the lock: the timer
+ * remains locked.
*/
static
struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
@@ -130,7 +137,7 @@ struct hrtimer_clock_base *lock_hrtimer_

for (;;) {
base = timer->base;
- if (likely(base != NULL)) {
+ if (likely(base != &migration_base)) {
raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
return base;
@@ -194,8 +201,8 @@ switch_hrtimer_base(struct hrtimer *time
if (unlikely(hrtimer_callback_running(timer)))
return base;

- /* See the comment in lock_timer_base() */
- timer->base = NULL;
+ /* See the comment in lock_hrtimer_base() */
+ timer->base = &migration_base;
raw_spin_unlock(&base->cpu_base->lock);
raw_spin_lock(&new_base->cpu_base->lock);

@@ -840,11 +847,7 @@ static int enqueue_hrtimer(struct hrtime

base->cpu_base->active_bases |= 1 << base->index;

- /*
- * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
- * state of a possibly running callback.
- */
- timer->state |= HRTIMER_STATE_ENQUEUED;
+ timer->state = HRTIMER_STATE_ENQUEUED;

return timerqueue_add(&base->active, &timer->node);
}
@@ -894,7 +897,6 @@ static inline int
remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
{
if (hrtimer_is_queued(timer)) {
- unsigned long state;
int reprogram;

/*
@@ -908,13 +910,8 @@ remove_hrtimer(struct hrtimer *timer, st
debug_deactivate(timer);
timer_stats_hrtimer_clear_start_info(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
- /*
- * We must preserve the CALLBACK state flag here,
- * otherwise we could move the timer base in
- * switch_hrtimer_base.
- */
- state = timer->state & HRTIMER_STATE_CALLBACK;
- __remove_hrtimer(timer, base, state, reprogram);
+
+ __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, reprogram);
return 1;
}
return 0;
@@ -1124,7 +1121,8 @@ static void __run_hrtimer(struct hrtimer
WARN_ON(!irqs_disabled());

debug_deactivate(timer);
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ base->running = timer;
+ __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
timer_stats_account_hrtimer(timer);
fn = timer->function;

@@ -1140,7 +1138,7 @@ static void __run_hrtimer(struct hrtimer
raw_spin_lock(&cpu_base->lock);

/*
- * Note: We clear the CALLBACK bit after enqueue_hrtimer and
+ * Note: We clear the running state after enqueue_hrtimer and
* we do not reprogramm the event hardware. Happens either in
* hrtimer_start_range_ns() or in hrtimer_interrupt()
*
@@ -1152,9 +1150,8 @@ static void __run_hrtimer(struct hrtimer
!(timer->state & HRTIMER_STATE_ENQUEUED))
enqueue_hrtimer(timer, base);

- WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
-
- timer->state &= ~HRTIMER_STATE_CALLBACK;
+ WARN_ON_ONCE(base->running != timer);
+ base->running = NULL;
}

static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
@@ -1523,11 +1520,10 @@ static void migrate_hrtimer_list(struct
* hrtimer_interrupt after we migrated everything to
* sort out already expired timers and reprogram the
* event device.
+ *
+ * Sets timer->state = HRTIMER_STATE_ENQUEUED.
*/
enqueue_hrtimer(timer, new_base);
-
- /* Clear the migration state bit */
- timer->state &= ~HRTIMER_STATE_MIGRATE;
}
}



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/