[PATCH RT 08/16] timers: Redo the notification of canceling timers on -RT
From: Steven Rostedt
Date: Fri Jul 19 2019 - 17:50:53 EST
4.19.59-rt24-rc1 stable review patch.
If anyone has any objections, please let me know.
------------------
From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
[ Upstream commit c71273154c2ad12e13333aada340ff30e826a11b ]
Rework of the hrtimer, timer and posix-timer cancelation interface
on -RT. Instead of the swait/schedule interface we now have locks
which are taken while timer is active. During the cancellation of an
active timer the lock is acquired. The lock will then either
PI-boost the timer or block and wait until the timer completed.
The new code looks simpler and does not trigger a warning from
rcu_note_context_switch() anymore like reported by Grygorii Strashko
and Daniel Wagner.
The patches were contributed by Anna-Maria Gleixner.
This is an all in one commit of the following patches:
| [PATCH] timers: Introduce expiry spin lock
| [PATCH] timers: Drop expiry lock after each timer invocation
| [PATCH] hrtimer: Introduce expiry spin lock
| [PATCH] posix-timers: move rcu out of union
| [PATCH] posix-timers: Add expiry lock
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
---
fs/timerfd.c | 5 +-
include/linux/hrtimer.h | 17 ++----
include/linux/posix-timers.h | 1 +
kernel/time/alarmtimer.c | 2 +-
kernel/time/hrtimer.c | 36 ++++---------
kernel/time/itimer.c | 2 +-
kernel/time/posix-cpu-timers.c | 23 ++++++++
kernel/time/posix-timers.c | 69 ++++++++++--------------
kernel/time/posix-timers.h | 2 +
kernel/time/timer.c | 96 ++++++++++++++++------------------
10 files changed, 118 insertions(+), 135 deletions(-)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 82d0f52414a6..f845093466be 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,10 +471,11 @@ static int do_timerfd_settime(int ufd, int flags,
break;
}
spin_unlock_irq(&ctx->wqh.lock);
+
if (isalarm(ctx))
- hrtimer_wait_for_timer(&ctx->t.alarm.timer);
+ hrtimer_grab_expiry_lock(&ctx->t.alarm.timer);
else
- hrtimer_wait_for_timer(&ctx->t.tmr);
+ hrtimer_grab_expiry_lock(&ctx->t.tmr);
}
/*
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2bdb047c7656..6c4c38186c99 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -22,7 +22,6 @@
#include <linux/percpu.h>
#include <linux/timer.h>
#include <linux/timerqueue.h>
-#include <linux/wait.h>
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -193,6 +192,8 @@ enum hrtimer_base_type {
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ * expired
* @expires_next: absolute time of the next event, is required for remote
* hrtimer enqueue; it is the total first expiry time (hard
* and soft hrtimer are taken into account)
@@ -220,12 +221,10 @@ struct hrtimer_cpu_base {
unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
+ spinlock_t softirq_expiry_lock;
ktime_t expires_next;
struct hrtimer *next_timer;
ktime_t softirq_expires_next;
-#ifdef CONFIG_PREEMPT_RT_BASE
- wait_queue_head_t wait;
-#endif
struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;
@@ -426,6 +425,7 @@ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
extern int hrtimer_cancel(struct hrtimer *timer);
extern int hrtimer_try_to_cancel(struct hrtimer *timer);
+extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer);
static inline void hrtimer_start_expires(struct hrtimer *timer,
enum hrtimer_mode mode)
@@ -443,13 +443,6 @@ static inline void hrtimer_restart(struct hrtimer *timer)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
-/* Softirq preemption could deadlock timer removal */
-#ifdef CONFIG_PREEMPT_RT_BASE
- extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
-#else
-# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
-#endif
-
/* Query timers: */
extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
@@ -475,7 +468,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
* Helper function to check, whether the timer is running the callback
* function
*/
-static inline int hrtimer_callback_running(const struct hrtimer *timer)
+static inline int hrtimer_callback_running(struct hrtimer *timer)
{
return timer->base->running == timer;
}
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 0571b498db73..3e6c91bdf2ef 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -15,6 +15,7 @@ struct cpu_timer_list {
u64 expires, incr;
struct task_struct *task;
int firing;
+ int firing_cpu;
};
/*
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 966708e8ce14..efa1e433974b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -436,7 +436,7 @@ int alarm_cancel(struct alarm *alarm)
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
- hrtimer_wait_for_timer(&alarm->timer);
+ hrtimer_grab_expiry_lock(&alarm->timer);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e1040b80362c..4534e7871c8c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -963,33 +963,16 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
}
EXPORT_SYMBOL_GPL(hrtimer_forward);
-#ifdef CONFIG_PREEMPT_RT_BASE
-# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
-
-/**
- * hrtimer_wait_for_timer - Wait for a running timer
- *
- * @timer: timer to wait for
- *
- * The function waits in case the timers callback function is
- * currently executed on the waitqueue of the timer base. The
- * waitqueue is woken up after the timer callback function has
- * finished execution.
- */
-void hrtimer_wait_for_timer(const struct hrtimer *timer)
+void hrtimer_grab_expiry_lock(const struct hrtimer *timer)
{
struct hrtimer_clock_base *base = timer->base;
- if (base && base->cpu_base &&
- base->index >= HRTIMER_BASE_MONOTONIC_SOFT)
- wait_event(base->cpu_base->wait,
- !(hrtimer_callback_running(timer)));
+ if (base && base->cpu_base) {
+ spin_lock(&base->cpu_base->softirq_expiry_lock);
+ spin_unlock(&base->cpu_base->softirq_expiry_lock);
+ }
}
-#else
-# define wake_up_timer_waiters(b) do { } while (0)
-#endif
-
/*
* enqueue_hrtimer - internal function to (re)start a timer
*
@@ -1224,7 +1207,7 @@ int hrtimer_cancel(struct hrtimer *timer)
if (ret >= 0)
return ret;
- hrtimer_wait_for_timer(timer);
+ hrtimer_grab_expiry_lock(timer);
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1528,6 +1511,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
unsigned long flags;
ktime_t now;
+ spin_lock(&cpu_base->softirq_expiry_lock);
raw_spin_lock_irqsave(&cpu_base->lock, flags);
now = hrtimer_update_base(cpu_base);
@@ -1537,7 +1521,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
hrtimer_update_softirq_timer(cpu_base, true);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
- wake_up_timer_waiters(cpu_base);
+ spin_unlock(&cpu_base->softirq_expiry_lock);
}
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1947,9 +1931,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
-#ifdef CONFIG_PREEMPT_RT_BASE
- init_waitqueue_head(&cpu_base->wait);
-#endif
+ spin_lock_init(&cpu_base->softirq_expiry_lock);
return 0;
}
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 55b0e58368bf..a5ff222df4c7 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -215,7 +215,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
- hrtimer_wait_for_timer(&tsk->signal->real_timer);
+ hrtimer_grab_expiry_lock(timer);
goto again;
}
expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index baeeaef3b721..59ceedbb03f0 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -789,6 +789,7 @@ check_timers_list(struct list_head *timers,
return t->expires;
t->firing = 1;
+ t->firing_cpu = smp_processor_id();
list_move_tail(&t->entry, firing);
}
@@ -1134,6 +1135,20 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
return 0;
}
+static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock);
+
+void cpu_timers_grab_expiry_lock(struct k_itimer *timer)
+{
+ int cpu = timer->it.cpu.firing_cpu;
+
+ if (cpu >= 0) {
+ spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu);
+
+ spin_lock_irq(expiry_lock);
+ spin_unlock_irq(expiry_lock);
+ }
+}
+
/*
* This is called from the timer interrupt handler. The irq handler has
* already updated our counts. We need to check if any timers fire now.
@@ -1144,6 +1159,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
LIST_HEAD(firing);
struct k_itimer *timer, *next;
unsigned long flags;
+ spinlock_t *expiry_lock;
/*
* The fast path checks that there are no expired thread or thread
@@ -1152,6 +1168,9 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
if (!fastpath_timer_check(tsk))
return;
+ expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock);
+ spin_lock(expiry_lock);
+
if (!lock_task_sighand(tsk, &flags))
return;
/*
@@ -1186,6 +1205,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
list_del_init(&timer->it.cpu.entry);
cpu_firing = timer->it.cpu.firing;
timer->it.cpu.firing = 0;
+ timer->it.cpu.firing_cpu = -1;
/*
* The firing flag is -1 if we collided with a reset
* of the timer, which already reported this
@@ -1195,6 +1215,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
cpu_timer_fire(timer);
spin_unlock(&timer->it_lock);
}
+ spin_unlock(expiry_lock);
}
#ifdef CONFIG_PREEMPT_RT_BASE
@@ -1460,6 +1481,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
spin_unlock_irq(&timer.it_lock);
while (error == TIMER_RETRY) {
+
+ cpu_timers_grab_expiry_lock(&timer);
/*
* We need to handle case when timer was or is in the
* middle of firing. In other cases we already freed
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index a5ec421e3437..c7e97d421590 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -821,25 +821,20 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
-/*
- * Protected by RCU!
- */
-static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr)
+static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- if (kc->timer_arm == common_hrtimer_arm)
- hrtimer_wait_for_timer(&timr->it.real.timer);
- else if (kc == &alarm_clock)
- hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer);
- else
- /* FIXME: Whacky hack for posix-cpu-timers */
- schedule_timeout(1);
-#endif
+ return hrtimer_try_to_cancel(&timr->it.real.timer);
}
-static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
+static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer)
{
- return hrtimer_try_to_cancel(&timr->it.real.timer);
+ if (kc->timer_arm == common_hrtimer_arm)
+ hrtimer_grab_expiry_lock(&timer->it.real.timer);
+ else if (kc == &alarm_clock)
+ hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer);
+ else
+ /* posix-cpu-timers */
+ cpu_timers_grab_expiry_lock(timer);
}
/* Set a POSIX.1b interval timer. */
@@ -901,21 +896,21 @@ static int do_timer_settime(timer_t timer_id, int flags,
if (!timr)
return -EINVAL;
- rcu_read_lock();
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
else
error = kc->timer_set(timr, flags, new_spec64, old_spec64);
- unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
+ rcu_read_lock();
+ unlock_timer(timr, flag);
timer_wait_for_callback(kc, timr);
- old_spec64 = NULL; // We already got the old time...
rcu_read_unlock();
+ old_spec64 = NULL; // We already got the old time...
goto retry;
}
- rcu_read_unlock();
+ unlock_timer(timr, flag);
return error;
}
@@ -977,13 +972,21 @@ int common_timer_del(struct k_itimer *timer)
return 0;
}
-static inline int timer_delete_hook(struct k_itimer *timer)
+static int timer_delete_hook(struct k_itimer *timer)
{
const struct k_clock *kc = timer->kclock;
+ int ret;
if (WARN_ON_ONCE(!kc || !kc->timer_del))
return -EINVAL;
- return kc->timer_del(timer);
+ ret = kc->timer_del(timer);
+ if (ret == TIMER_RETRY) {
+ rcu_read_lock();
+ spin_unlock_irq(&timer->it_lock);
+ timer_wait_for_callback(kc, timer);
+ rcu_read_unlock();
+ }
+ return ret;
}
/* Delete a POSIX.1b interval timer. */
@@ -997,15 +1000,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
if (!timer)
return -EINVAL;
- rcu_read_lock();
- if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
- timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
- timer);
- rcu_read_unlock();
+ if (timer_delete_hook(timer) == TIMER_RETRY)
goto retry_delete;
- }
- rcu_read_unlock();
spin_lock(¤t->sighand->siglock);
list_del(&timer->list);
@@ -1031,20 +1027,9 @@ static void itimer_delete(struct k_itimer *timer)
retry_delete:
spin_lock_irqsave(&timer->it_lock, flags);
- /* On RT we can race with a deletion */
- if (!timer->it_signal) {
- unlock_timer(timer, flags);
- return;
- }
-
- if (timer_delete_hook(timer) == TIMER_RETRY) {
- rcu_read_lock();
- unlock_timer(timer, flags);
- timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
- timer);
- rcu_read_unlock();
+ if (timer_delete_hook(timer) == TIMER_RETRY)
goto retry_delete;
- }
+
list_del(&timer->list);
/*
* This keeps any tasks waiting on the spin lock from thinking
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index ddb21145211a..725bd230a8db 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -32,6 +32,8 @@ extern const struct k_clock clock_process;
extern const struct k_clock clock_thread;
extern const struct k_clock alarm_clock;
+extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer);
+
int posix_timer_event(struct k_itimer *timr, int si_private);
void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 781483c76b17..d6289d8df06b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -44,7 +44,6 @@
#include <linux/sched/debug.h>
#include <linux/slab.h>
#include <linux/compat.h>
-#include <linux/swait.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -198,9 +197,7 @@ EXPORT_SYMBOL(jiffies_64);
struct timer_base {
raw_spinlock_t lock;
struct timer_list *running_timer;
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct swait_queue_head wait_for_running_timer;
-#endif
+ spinlock_t expiry_lock;
unsigned long clk;
unsigned long next_expiry;
unsigned int cpu;
@@ -1189,33 +1186,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
}
EXPORT_SYMBOL_GPL(add_timer_on);
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * Wait for a running timer
- */
-static void wait_for_running_timer(struct timer_list *timer)
-{
- struct timer_base *base;
- u32 tf = timer->flags;
-
- if (tf & TIMER_MIGRATING)
- return;
-
- base = get_timer_base(tf);
- swait_event_exclusive(base->wait_for_running_timer,
- base->running_timer != timer);
-}
-
-# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer)
-#else
-static inline void wait_for_running_timer(struct timer_list *timer)
-{
- cpu_relax();
-}
-
-# define wakeup_timer_waiters(b) do { } while (0)
-#endif
-
/**
* del_timer - deactivate a timer.
* @timer: the timer to be deactivated
@@ -1245,14 +1215,8 @@ int del_timer(struct timer_list *timer)
}
EXPORT_SYMBOL(del_timer);
-/**
- * try_to_del_timer_sync - Try to deactivate a timer
- * @timer: timer to delete
- *
- * This function tries to deactivate a timer. Upon successful (ret >= 0)
- * exit the timer is not queued and the handler is not running on any CPU.
- */
-int try_to_del_timer_sync(struct timer_list *timer)
+static int __try_to_del_timer_sync(struct timer_list *timer,
+ struct timer_base **basep)
{
struct timer_base *base;
unsigned long flags;
@@ -1260,7 +1224,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
debug_assert_init(timer);
- base = lock_timer_base(timer, &flags);
+ *basep = base = lock_timer_base(timer, &flags);
if (base->running_timer != timer)
ret = detach_if_pending(timer, base, true);
@@ -1269,9 +1233,42 @@ int try_to_del_timer_sync(struct timer_list *timer)
return ret;
}
+
+/**
+ * try_to_del_timer_sync - Try to deactivate a timer
+ * @timer: timer to delete
+ *
+ * This function tries to deactivate a timer. Upon successful (ret >= 0)
+ * exit the timer is not queued and the handler is not running on any CPU.
+ */
+int try_to_del_timer_sync(struct timer_list *timer)
+{
+ struct timer_base *base;
+
+ return __try_to_del_timer_sync(timer, &base);
+}
EXPORT_SYMBOL(try_to_del_timer_sync);
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+static int __del_timer_sync(struct timer_list *timer)
+{
+ struct timer_base *base;
+ int ret;
+
+ for (;;) {
+ ret = __try_to_del_timer_sync(timer, &base);
+ if (ret >= 0)
+ return ret;
+
+ /*
+ * When accessing the lock, timers of base are no longer expired
+ * and so timer is no longer running.
+ */
+ spin_lock(&base->expiry_lock);
+ spin_unlock(&base->expiry_lock);
+ }
+}
+
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -1327,12 +1324,8 @@ int del_timer_sync(struct timer_list *timer)
* could lead to deadlock.
*/
WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
- for (;;) {
- int ret = try_to_del_timer_sync(timer);
- if (ret >= 0)
- return ret;
- wait_for_running_timer(timer);
- }
+
+ return __del_timer_sync(timer);
}
EXPORT_SYMBOL(del_timer_sync);
#endif
@@ -1397,11 +1390,15 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
raw_spin_unlock(&base->lock);
call_timer_fn(timer, fn);
base->running_timer = NULL;
+ spin_unlock(&base->expiry_lock);
+ spin_lock(&base->expiry_lock);
raw_spin_lock(&base->lock);
} else {
raw_spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn);
base->running_timer = NULL;
+ spin_unlock(&base->expiry_lock);
+ spin_lock(&base->expiry_lock);
raw_spin_lock_irq(&base->lock);
}
}
@@ -1696,6 +1693,7 @@ static inline void __run_timers(struct timer_base *base)
if (!time_after_eq(jiffies, base->clk))
return;
+ spin_lock(&base->expiry_lock);
raw_spin_lock_irq(&base->lock);
/*
@@ -1723,7 +1721,7 @@ static inline void __run_timers(struct timer_base *base)
expire_timers(base, heads + levels);
}
raw_spin_unlock_irq(&base->lock);
- wakeup_timer_waiters(base);
+ spin_unlock(&base->expiry_lock);
}
/*
@@ -1970,9 +1968,7 @@ static void __init init_timer_cpu(int cpu)
base->cpu = cpu;
raw_spin_lock_init(&base->lock);
base->clk = jiffies;
-#ifdef CONFIG_PREEMPT_RT_FULL
- init_swait_queue_head(&base->wait_for_running_timer);
-#endif
+ spin_lock_init(&base->expiry_lock);
}
}
--
2.20.1