[PATCH rc1-mm3] timers: kill timer_list->lock

From: Oleg Nesterov
Date: Sun Mar 27 2005 - 09:18:51 EST


The timer->lock is needed in __mod_timer() because the freshly
inited timer has base == NULL, so __mod_timer() can't use old_base
for synchronization.

With this patch init_timer() sets ->_base = per_cpu(tvec_bases).
This simplifies the code because we don't need to check that
base != NULL, and slightly speedups __mod_timer(). And reduces
the timer_list's size.

One problem. TIMER_INITIALIZER can't use per_cpu(tvec_bases).
So this patch adds global
struct {
spinlock_t lock;
struct timer_list *running_timer;
} fake_timer_base;
which is used by TIMER_INITIALIZER.

It is indeed ugly. But this can't have scalability problems.
The global fake_timer_base.lock is used only when __mod_timer()
is called for the first time and the timer was compile time
initialized. After that the timer migrates to the local CPU.

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>

--- rc1-mm3/include/linux/timer.h~ 2005-03-27 21:05:51.000000000 +0400
+++ rc1-mm3/include/linux/timer.h 2005-03-27 21:07:29.000000000 +0400
@@ -12,7 +12,6 @@ struct timer_list {
struct list_head entry;
unsigned long expires;

- spinlock_t lock;
unsigned long magic;

void (*function)(unsigned long);
@@ -25,15 +24,11 @@ struct timer_list {
* To save space, we play games with the least significant bit
* of timer_list->_base.
*
- * If (_base & __TIMER_PENDING), the timer is pending. Implies
- * (_base & ~__TIMER_PENDING) != NULL.
+ * If (_base & __TIMER_PENDING), the timer is pending.
*
- * (_base & ~__TIMER_PENDING), if != NULL, is the address of the
- * timer's per-cpu tvec_t_base_s where it was last scheduled and
- * where it may still be running.
- *
- * If (_base == NULL), the timer was not scheduled yet or it was
- * cancelled by del_timer_sync(), so it is not running on any CPU.
+ * (_base & ~__TIMER_PENDING) is the address of the timer's
+ * per-cpu tvec_t_base_s where it was last registered and where
+ * it may still be running.
*/

#define __TIMER_PENDING 1
@@ -45,28 +40,17 @@ static inline int __timer_pending(struct

#define TIMER_MAGIC 0x4b87ad6e

+extern struct fake_timer_base fake_timer_base;
+
#define TIMER_INITIALIZER(_function, _expires, _data) { \
.function = (_function), \
.expires = (_expires), \
.data = (_data), \
- ._base = NULL, \
+ ._base = (void*)&fake_timer_base, \
.magic = TIMER_MAGIC, \
- .lock = SPIN_LOCK_UNLOCKED, \
}

-/***
- * init_timer - initialize a timer.
- * @timer: the timer to be initialized
- *
- * init_timer() must be done to a timer prior calling *any* of the
- * other timer functions.
- */
-static inline void init_timer(struct timer_list * timer)
-{
- timer->_base = NULL;
- timer->magic = TIMER_MAGIC;
- spin_lock_init(&timer->lock);
-}
+void fastcall init_timer(struct timer_list * timer);

/***
* timer_pending - is a timer pending?
--- rc1-mm3/kernel/timer.c~ 2005-03-27 21:06:19.000000000 +0400
+++ rc1-mm3/kernel/timer.c 2005-03-27 21:07:29.000000000 +0400
@@ -68,8 +68,8 @@ typedef struct tvec_root_s {

struct tvec_t_base_s {
spinlock_t lock;
- unsigned long timer_jiffies;
struct timer_list *running_timer;
+ unsigned long timer_jiffies;
tvec_root_t tv1;
tvec_t tv2;
tvec_t tv3;
@@ -78,6 +78,16 @@ struct tvec_t_base_s {
} ____cacheline_aligned_in_smp;

typedef struct tvec_t_base_s tvec_base_t;
+static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+
+struct fake_timer_base {
+ spinlock_t lock;
+ struct timer_list *running_timer;
+} ____cacheline_aligned_in_smp;
+
+struct fake_timer_base fake_timer_base =
+ { .lock = SPIN_LOCK_UNLOCKED };
+EXPORT_SYMBOL(fake_timer_base);

static inline void set_running_timer(tvec_base_t *base,
struct timer_list *timer)
@@ -87,6 +97,28 @@ static inline void set_running_timer(tve
#endif
}

+/***
+ * init_timer - initialize a timer.
+ * @timer: the timer to be initialized
+ *
+ * init_timer() must be done to a timer prior calling *any* of the
+ * other timer functions.
+ */
+void fastcall init_timer(struct timer_list * timer)
+{
+ BUILD_BUG_ON(
+ offsetof(tvec_base_t, lock) !=
+ offsetof(struct fake_timer_base, lock)
+ ||
+ offsetof(tvec_base_t, running_timer) !=
+ offsetof(struct fake_timer_base, running_timer)
+ );
+
+ timer->_base = &per_cpu(tvec_bases, __smp_processor_id());
+ timer->magic = TIMER_MAGIC;
+}
+EXPORT_SYMBOL(init_timer);
+
static inline void __set_base(struct timer_list *timer,
tvec_base_t *base, int pending)
{
@@ -101,9 +133,6 @@ static inline tvec_base_t *timer_base(st
return (tvec_base_t*)((unsigned long)timer->_base & ~__TIMER_PENDING);
}

-/* Fake initialization */
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
-
static void check_timer_failed(struct timer_list *timer)
{
static int whine_count;
@@ -118,7 +147,6 @@ static void check_timer_failed(struct ti
/*
* Now fix it up
*/
- spin_lock_init(&timer->lock);
timer->magic = TIMER_MAGIC;
}

@@ -182,39 +210,33 @@ int __mod_timer(struct timer_list *timer
check_timer(timer);

do {
- spin_lock_irqsave(&timer->lock, flags);
+ local_irq_save(flags);
new_base = &__get_cpu_var(tvec_bases);
old_base = timer_base(timer);

- /* Prevent AB-BA deadlocks. */
+ /* Prevent AB-BA deadlocks.
+ * NOTE: (old_base == new_base) => (new_lock == 0)
+ */
new_lock = old_base < new_base;
if (new_lock)
spin_lock(&new_base->lock);
+ spin_lock(&old_base->lock);

- /* Note:
- * (old_base == NULL) => (new_lock == 1)
- * (old_base == new_base) => (new_lock == 0)
- */
- if (old_base) {
- spin_lock(&old_base->lock);
+ if (timer_base(timer) != old_base)
+ goto unlock;

- if (timer_base(timer) != old_base)
+ if (old_base != new_base) {
+ /* Ensure the timer is serialized. */
+ if (old_base->running_timer == timer)
goto unlock;

- if (old_base != new_base) {
- /* Ensure the timer is serialized. */
- if (old_base->running_timer == timer)
- goto unlock;
-
- if (!new_lock) {
- spin_lock(&new_base->lock);
- new_lock = 1;
- }
+ if (!new_lock) {
+ spin_lock(&new_base->lock);
+ new_lock = 1;
}
}

ret = 0;
- /* We hold timer->lock, no need to check old_base != 0. */
if (timer_pending(timer)) {
list_del(&timer->entry);
ret = 1;
@@ -224,11 +246,9 @@ int __mod_timer(struct timer_list *timer
internal_add_timer(new_base, timer);
__set_base(timer, new_base, 1);
unlock:
- if (old_base)
- spin_unlock(&old_base->lock);
if (new_lock)
spin_unlock(&new_base->lock);
- spin_unlock_irqrestore(&timer->lock, flags);
+ spin_unlock_irqrestore(&old_base->lock, flags);
} while (ret == -1);

return ret;
@@ -355,19 +375,14 @@ EXPORT_SYMBOL(del_timer);
*/
int del_timer_sync(struct timer_list *timer)
{
- int ret;
+ unsigned long flags;
+ tvec_base_t *base;
+ int ret = -1;

check_timer(timer);

- ret = 0;
- for (;;) {
- unsigned long flags;
- tvec_base_t *base;
-
+ do {
base = timer_base(timer);
- if (!base)
- return ret;
-
spin_lock_irqsave(&base->lock, flags);

if (base->running_timer == timer)
@@ -376,18 +391,19 @@ int del_timer_sync(struct timer_list *ti
if (timer_base(timer) != base)
goto unlock;

+ ret = 0;
if (timer_pending(timer)) {
list_del(&timer->entry);
+ __set_base(timer, base, 0);
ret = 1;
}
- /* Need to make sure that anybody who sees a NULL base
- * also sees the list ops */
- smp_wmb();
- timer->_base = NULL;
unlock:
spin_unlock_irqrestore(&base->lock, flags);
- }
+ } while (ret == -1);
+
+ return ret;
}
+
EXPORT_SYMBOL(del_timer_sync);

/***
@@ -1323,22 +1339,16 @@ static void __devinit init_timers_cpu(in
}

#ifdef CONFIG_HOTPLUG_CPU
-static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
{
struct timer_list *timer;

while (!list_empty(head)) {
timer = list_entry(head->next, struct timer_list, entry);
- /* We're locking backwards from __mod_timer order here,
- beware deadlock. */
- if (!spin_trylock(&timer->lock))
- return 0;
list_del(&timer->entry);
internal_add_timer(new_base, timer);
__set_base(timer, new_base, 1);
- spin_unlock(&timer->lock);
}
- return 1;
}

static void __devinit migrate_timers(int cpu)
@@ -1352,7 +1362,6 @@ static void __devinit migrate_timers(int
new_base = &get_cpu_var(tvec_bases);

local_irq_disable();
-again:
/* Prevent deadlocks via ordering by old_base < new_base. */
if (old_base < new_base) {
spin_lock(&new_base->lock);
@@ -1365,26 +1374,17 @@ again:
if (old_base->running_timer)
BUG();
for (i = 0; i < TVR_SIZE; i++)
- if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
- goto unlock_again;
- for (i = 0; i < TVN_SIZE; i++)
- if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
- || !migrate_timer_list(new_base, old_base->tv3.vec + i)
- || !migrate_timer_list(new_base, old_base->tv4.vec + i)
- || !migrate_timer_list(new_base, old_base->tv5.vec + i))
- goto unlock_again;
+ migrate_timer_list(new_base, old_base->tv1.vec + i);
+ for (i = 0; i < TVN_SIZE; i++) {
+ migrate_timer_list(new_base, old_base->tv2.vec + i);
+ migrate_timer_list(new_base, old_base->tv3.vec + i);
+ migrate_timer_list(new_base, old_base->tv4.vec + i);
+ migrate_timer_list(new_base, old_base->tv5.vec + i);
+ }
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
local_irq_enable();
put_cpu_var(tvec_bases);
- return;
-
-unlock_again:
- /* Avoid deadlock with __mod_timer, by backing off. */
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
- cpu_relax();
- goto again;
}
#endif /* CONFIG_HOTPLUG_CPU */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/