I fixed the timer code to be robust against the bad scenario I discovered
in the last days. The bad secnario consists in a timer that reinsert
itself with an expire <= jiffies (or more precisely < timer_jiffies).
In the current 2.2.x and 2.3.x this scenario will lead in a plain
deadlock.
I verifyed the correctness the patch in an userspace simulation.
Ursus could you please check out if you can still deadlock your machine
with this patch against 2.2.14 applyed on the top of your current tree?
--- 2.2.14/kernel/sched.c Wed Jan 5 14:16:56 2000
+++ /tmp/sched.c Wed Jan 12 00:45:15 2000
@@ -535,6 +535,15 @@
/* can happen if you add a timer with expires == jiffies,
* or you set a timer to go off in the past
*/
+ if ((signed long) idx < -50)
+ /* Nobody should set a timer so insanely in the past or
+ waiting so many timer interrupts between reading
+ jiffies and calling the timer code. The timer code
+ is completly robust against this condition but
+ a printk may let us know about bugs in the
+ caller we might not notice otherwise. */
+ printk(KERN_WARNING
+ "timer inserted in the past, idx = %ld\n", idx);
insert_timer(timer, tv1.vec, tv1.index);
} else if (idx <= 0xffffffffUL) {
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
@@ -1124,10 +1133,31 @@
tv->index = (tv->index + 1) & TVN_MASK;
}
+/* defer current timers to the next pass */
+static void cascade_current_timers(void)
+{
+ struct timer_list * timer;
+ int index = tv1.index;
+
+ timer = tv1.vec[index];
+ tv1.index = (tv1.index + 1) & TVR_MASK;
+
+ while (timer)
+ {
+ struct timer_list *tmp = timer;
+ timer = timer->next;
+ insert_timer(tmp, tv1.vec, tv1.index);
+ }
+ tv1.vec[index] = NULL;
+}
+
static inline void run_timer_list(void)
{
+ long passes;
+
spin_lock_irq(&timerlist_lock);
- while ((long)(jiffies - timer_jiffies) >= 0) {
+ passes = jiffies - timer_jiffies;
+ while (passes-- >= 0) {
struct timer_list *timer;
if (!tv1.index) {
int n = 1;
@@ -1135,17 +1165,21 @@
cascade_timers(tvecs[n]);
} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
}
- while ((timer = tv1.vec[tv1.index])) {
+ timer = tv1.vec[tv1.index];
+ tv1.vec[tv1.index] = 0;
+ while (timer) {
void (*fn)(unsigned long) = timer->function;
unsigned long data = timer->data;
- detach_timer(timer);
- timer->next = timer->prev = NULL;
+ struct timer_list * tmp = timer;
+ timer = timer->next;
+ detach_timer(tmp);
+ tmp->next = tmp->prev = NULL;
spin_unlock_irq(&timerlist_lock);
fn(data);
spin_lock_irq(&timerlist_lock);
}
++timer_jiffies;
- tv1.index = (tv1.index + 1) & TVR_MASK;
+ cascade_current_timers();
}
spin_unlock_irq(&timerlist_lock);
}
The same patch will apply cleanly also to 2.3.38 by specifying as file to
patch linux/kernel/timer.c .
Or you can download the patch from here:
ftp://ftp.*.kernel.org/pub/linux/kernel/people/andrea/patches/v2.2/2.2.14/timer_bh-deadlock-1.gz
ftp://ftp.*.kernel.org/pub/linux/kernel/people/andrea/patches/v2.3/2.3.38/timer_bh-deadlock-1.gz
Andrea
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sat Jan 15 2000 - 21:00:19 EST