[GIT PULL] timers for 3.7

From: Thomas Gleixner
Date: Fri Oct 12 2012 - 06:41:50 EST


Linus,

please pull the latest timers-core-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers-core-for-linus

Thanks,

tglx

------------------>
Arnd Bergmann (1):
time/jiffies: bring back unconditional LATCH definition

Dan Carpenter (1):
timekeeping: Cast raw_interval to u64 to avoid shift overflow

Hildner, Christian (1):
timers: Fix endless looping between cascade() and internal_add_timer()

John Stultz (11):
alarmtimer: Use hrtimer per-alarm instead of per-base
alarmtimer: Remove unused helpers & defines
alarmtimer: Rename alarmtimer_remove to alarmtimer_dequeue
jiffies: Kill unused TICK_USEC_TO_NSEC
jiffies: Remove compile time assumptions about CLOCK_TICK_RATE
time: Move timekeeper structure to timekeeper_internal.h for vsyscall changes
time: Move update_vsyscall definitions to timekeeper_internal.h
time: Convert CONFIG_GENERIC_TIME_VSYSCALL to CONFIG_GENERIC_TIME_VSYSCALL_OLD
time: Introduce new GENERIC_TIME_VSYSCALL
time: Only do nanosecond rounding on GENERIC_TIME_VSYSCALL_OLD systems
time: Convert x86_64 to using new update_vsyscall

Tejun Heo (4):
timer: Generalize timer->base flags handling
timer: Relocate declarations of init_timer_on_stack_key()
timer: Clean up timer initializers
timer: Implement TIMER_IRQSAFE

Todd Poynor (1):
alarmtimer: Implement minimum alarm interval for allowing suspend


arch/ia64/Kconfig | 2 +-
arch/ia64/kernel/time.c | 4 +-
arch/powerpc/Kconfig | 2 +-
arch/powerpc/kernel/time.c | 4 +-
arch/s390/Kconfig | 2 +-
arch/s390/kernel/time.c | 4 +-
arch/x86/include/asm/vgtod.h | 4 +-
arch/x86/kernel/setup.c | 3 +
arch/x86/kernel/vsyscall_64.c | 49 ++++++----
arch/x86/vdso/vclock_gettime.c | 22 +++--
include/linux/alarmtimer.h | 31 +------
include/linux/clocksource.h | 16 ----
include/linux/jiffies.h | 20 +----
include/linux/timekeeper_internal.h | 108 +++++++++++++++++++++++
include/linux/timer.h | 165 ++++++++++++++---------------------
kernel/time.c | 2 +-
kernel/time/Kconfig | 4 +
kernel/time/alarmtimer.c | 118 +++++++++----------------
kernel/time/jiffies.c | 32 +++++++-
kernel/time/timekeeping.c | 117 +++++++------------------
kernel/timer.c | 118 ++++++++++++-------------
21 files changed, 403 insertions(+), 424 deletions(-)
create mode 100644 include/linux/timekeeper_internal.h

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf57..f9e673c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -38,7 +38,7 @@ config IA64
select ARCH_TASK_STRUCT_ALLOCATOR
select ARCH_THREAD_INFO_ALLOCATOR
select ARCH_CLOCKSOURCE_DATA
- select GENERIC_TIME_VSYSCALL
+ select GENERIC_TIME_VSYSCALL_OLD
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ecc904b..d2f4e26 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -19,7 +19,7 @@
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/platform_device.h>

#include <asm/machvec.h>
@@ -454,7 +454,7 @@ void update_vsyscall_tz(void)
{
}

-void update_vsyscall(struct timespec *wall, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
struct clocksource *c, u32 mult)
{
write_seqcount_begin(&fsyscall_gtod_data.seq);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 352f416..0881660 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
- select GENERIC_TIME_VSYSCALL
+ select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e931..c825809 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -73,7 +73,7 @@
/* powerpc clocksource/clockevent code */

#include <linux/clockchips.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>

static cycle_t rtc_read(struct clocksource *);
static struct clocksource clocksource_rtc = {
@@ -712,7 +712,7 @@ static cycle_t timebase_read(struct clocksource *cs)
return (cycle_t)get_tb();
}

-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
u64 new_tb_to_xs, new_stamp_xsec;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e..ba488aa 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -121,7 +121,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_SMP_IDLE_THREAD
- select GENERIC_TIME_VSYSCALL
+ select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
select KTIME_SCALAR if 32BIT
select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dcec960..c5430bf 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -34,7 +34,7 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/kprobes.h>
@@ -219,7 +219,7 @@ struct clocksource * __init clocksource_default_clock(void)
return &clocksource_tod;
}

-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
if (clock != &clocksource_tod)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 8b38be2..46e24d3 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {

/* open coded 'struct timespec' */
time_t wall_time_sec;
- u32 wall_time_nsec;
- u32 monotonic_time_nsec;
+ u64 wall_time_snsec;
+ u64 monotonic_time_snsec;
time_t monotonic_time_sec;

struct timezone sys_tz;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80..4062f15 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -68,6 +68,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
+#include <linux/jiffies.h>

#include <video/edid.h>

@@ -1034,6 +1035,8 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();

arch_init_ideal_nops();
+
+ register_refined_jiffies(CLOCK_TICK_RATE);
}

#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8d141b3..3a3e8c9 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -28,7 +28,7 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/topology.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/getcpu.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
vsyscall_gtod_data.sys_tz = sys_tz;
}

-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
{
- struct timespec monotonic;
+ struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;

- write_seqcount_begin(&vsyscall_gtod_data.seq);
+ write_seqcount_begin(&vdata->seq);

/* copy vsyscall data */
- vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
- vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
- vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = mult;
- vsyscall_gtod_data.clock.shift = clock->shift;
-
- vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->clock->cycle_last;
+ vdata->clock.mask = tk->clock->mask;
+ vdata->clock.mult = tk->mult;
+ vdata->clock.shift = tk->shift;
+
+ vdata->wall_time_sec = tk->xtime_sec;
+ vdata->wall_time_snsec = tk->xtime_nsec;
+
+ vdata->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->xtime_nsec
+ + (tk->wall_to_monotonic.tv_nsec
+ << tk->shift);
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->shift;
+ vdata->monotonic_time_sec++;
+ }

- monotonic = timespec_add(*wall_time, *wtm);
- vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
- vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
+ vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
+ vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);

- vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
- vsyscall_gtod_data.monotonic_time_coarse =
- timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
+ vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
+ tk->wall_to_monotonic);

- write_seqcount_end(&vsyscall_gtod_data.seq);
+ write_seqcount_end(&vdata->seq);
}

static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 885eff4..4df6c37 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
}


-notrace static inline long vgetns(void)
+notrace static inline u64 vgetsns(void)
{
long v;
cycles_t cycles;
@@ -91,21 +91,24 @@ notrace static inline long vgetns(void)
else
return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
- return (v * gtod->clock.mult) >> gtod->clock.shift;
+ return v * gtod->clock.mult;
}

/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
{
- unsigned long seq, ns;
+ unsigned long seq;
+ u64 ns;
int mode;

+ ts->tv_nsec = 0;
do {
seq = read_seqcount_begin(&gtod->seq);
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
- ts->tv_nsec = gtod->wall_time_nsec;
- ns = vgetns();
+ ns = gtod->wall_time_snsec;
+ ns += vgetsns();
+ ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));

timespec_add_ns(ts, ns);
@@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts)

notrace static int do_monotonic(struct timespec *ts)
{
- unsigned long seq, ns;
+ unsigned long seq;
+ u64 ns;
int mode;

+ ts->tv_nsec = 0;
do {
seq = read_seqcount_begin(&gtod->seq);
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->monotonic_time_sec;
- ts->tv_nsec = gtod->monotonic_time_nsec;
- ns = vgetns();
+ ns = gtod->monotonic_time_snsec;
+ ns += vgetsns();
+ ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
timespec_add_ns(ts, ns);

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 96c5c24..9069694 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -21,7 +21,6 @@ enum alarmtimer_restart {

#define ALARMTIMER_STATE_INACTIVE 0x00
#define ALARMTIMER_STATE_ENQUEUED 0x01
-#define ALARMTIMER_STATE_CALLBACK 0x02

/**
* struct alarm - Alarm timer structure
@@ -35,6 +34,7 @@ enum alarmtimer_restart {
*/
struct alarm {
struct timerqueue_node node;
+ struct hrtimer timer;
enum alarmtimer_restart (*function)(struct alarm *, ktime_t now);
enum alarmtimer_type type;
int state;
@@ -43,39 +43,12 @@ struct alarm {

void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
-void alarm_start(struct alarm *alarm, ktime_t start);
+int alarm_start(struct alarm *alarm, ktime_t start);
int alarm_try_to_cancel(struct alarm *alarm);
int alarm_cancel(struct alarm *alarm);

u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval);

-/*
- * A alarmtimer is active, when it is enqueued into timerqueue or the
- * callback function is running.
- */
-static inline int alarmtimer_active(const struct alarm *timer)
-{
- return timer->state != ALARMTIMER_STATE_INACTIVE;
-}
-
-/*
- * Helper function to check, whether the timer is on one of the queues
- */
-static inline int alarmtimer_is_queued(struct alarm *timer)
-{
- return timer->state & ALARMTIMER_STATE_ENQUEUED;
-}
-
-/*
- * Helper function to check, whether the timer is running the callback
- * function
- */
-static inline int alarmtimer_callback_running(struct alarm *timer)
-{
- return timer->state & ALARMTIMER_STATE_CALLBACK;
-}
-
-
/* Provide way to access the rtc device being used by alarmtimers */
struct rtc_device *alarmtimer_get_rtcdev(void);

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index fbe89e1..4dceaf8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -319,22 +319,6 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
__clocksource_updatefreq_scale(cs, 1000, khz);
}

-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
-extern void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
- struct clocksource *c, u32 mult);
-extern void update_vsyscall_tz(void);
-#else
-static inline void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
- struct clocksource *c, u32 mult)
-{
-}
-
-static inline void update_vsyscall_tz(void)
-{
-}
-#endif

extern void timekeeping_notify(struct clocksource *clock);

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 8268054..c6d5b2a 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -51,31 +51,17 @@
#define SH_DIV(NOM,DEN,LSH) ( (((NOM) / (DEN)) << (LSH)) \
+ ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))

-#ifdef CLOCK_TICK_RATE
/* LATCH is used in the interval timer and ftape setup. */
-# define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
+#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */

-/*
- * HZ is the requested value. However the CLOCK_TICK_RATE may not allow
- * for exactly HZ. So SHIFTED_HZ is high res HZ ("<< 8" is for accuracy)
- */
-# define SHIFTED_HZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
-#else
-# define SHIFTED_HZ (HZ << 8)
-#endif
+extern int register_refined_jiffies(long clock_tick_rate);

/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
-#define TICK_NSEC (SH_DIV(1000000UL * 1000, SHIFTED_HZ, 8))
+#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)

/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)

-/*
- * TICK_USEC_TO_NSEC is the time between ticks in nsec assuming SHIFTED_HZ and
- * a value TUSEC for TICK_USEC (can be set bij adjtimex)
- */
-#define TICK_USEC_TO_NSEC(TUSEC) (SH_DIV(TUSEC * USER_HZ * 1000, SHIFTED_HZ, 8))
-
/* some arch's have a small-data section that can be accessed register-relative
* but that can only take up to, say, 4-byte variables. jiffies being part of
* an 8-byte variable may not be correctly accessed unless we force the issue
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
new file mode 100644
index 0000000..e1d558e
--- /dev/null
+++ b/include/linux/timekeeper_internal.h
@@ -0,0 +1,108 @@
+/*
+ * You SHOULD NOT be including this unless you're vsyscall
+ * handling code or timekeeping internal code!
+ */
+
+#ifndef _LINUX_TIMEKEEPER_INTERNAL_H
+#define _LINUX_TIMEKEEPER_INTERNAL_H
+
+#include <linux/clocksource.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+
+/* Structure holding internal timekeeping values. */
+struct timekeeper {
+ /* Current clocksource used for timekeeping. */
+ struct clocksource *clock;
+ /* NTP adjusted clock multiplier */
+ u32 mult;
+ /* The shift value of the current clocksource. */
+ u32 shift;
+ /* Number of clock cycles in one NTP interval. */
+ cycle_t cycle_interval;
+ /* Number of clock shifted nano seconds in one NTP interval. */
+ u64 xtime_interval;
+ /* shifted nano seconds left over when rounding cycle_interval */
+ s64 xtime_remainder;
+ /* Raw nano seconds accumulated per NTP interval. */
+ u32 raw_interval;
+
+ /* Current CLOCK_REALTIME time in seconds */
+ u64 xtime_sec;
+ /* Clock shifted nano seconds */
+ u64 xtime_nsec;
+
+ /* Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds. */
+ s64 ntp_error;
+ /* Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds. */
+ u32 ntp_error_shift;
+
+ /*
+ * wall_to_monotonic is what we need to add to xtime (or xtime corrected
+ * for sub jiffie times) to get to monotonic time. Monotonic is pegged
+ * at zero at system boot time, so wall_to_monotonic will be negative,
+ * however, we will ALWAYS keep the tv_nsec part positive so we can use
+ * the usual normalization.
+ *
+ * wall_to_monotonic is moved after resume from suspend for the
+ * monotonic time not to jump. We need to add total_sleep_time to
+ * wall_to_monotonic to get the real boot based time offset.
+ *
+ * - wall_to_monotonic is no longer the boot time, getboottime must be
+ * used instead.
+ */
+ struct timespec wall_to_monotonic;
+ /* Offset clock monotonic -> clock realtime */
+ ktime_t offs_real;
+ /* time spent in suspend */
+ struct timespec total_sleep_time;
+ /* Offset clock monotonic -> clock boottime */
+ ktime_t offs_boot;
+ /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
+ struct timespec raw_time;
+ /* Seqlock for all timekeeper values */
+ seqlock_t lock;
+};
+
+static inline struct timespec tk_xtime(struct timekeeper *tk)
+{
+ struct timespec ts;
+
+ ts.tv_sec = tk->xtime_sec;
+ ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+ return ts;
+}
+
+
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+
+extern void update_vsyscall(struct timekeeper *tk);
+extern void update_vsyscall_tz(void);
+
+#elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD)
+
+extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
+ struct clocksource *c, u32 mult);
+extern void update_vsyscall_tz(void);
+
+static inline void update_vsyscall(struct timekeeper *tk)
+{
+ struct timespec xt;
+
+ xt = tk_xtime(tk);
+ update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+}
+
+#else
+
+static inline void update_vsyscall(struct timekeeper *tk)
+{
+}
+static inline void update_vsyscall_tz(void)
+{
+}
+#endif
+
+#endif /* _LINUX_TIMEKEEPER_INTERNAL_H */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 6abd913..8c5a197 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -49,147 +49,112 @@ extern struct tvec_base boot_tvec_bases;
#endif

/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB to
- * indicate whether the timer is deferrable.
+ * Note that all tvec_bases are at least 4 byte aligned and lower two bits
+ * of base in timer_list is guaranteed to be zero. Use them for flags.
*
* A deferrable timer will work normally when the system is busy, but
* will not cause a CPU to come out of idle just to service it; instead,
* the timer will be serviced when the CPU eventually wakes up with a
* subsequent non-deferrable timer.
+ *
+ * An irqsafe timer is executed with IRQ disabled and it's safe to wait for
+ * the completion of the running instance from IRQ handlers, for example,
+ * by calling del_timer_sync().
+ *
+ * Note: The irq disabled callback execution is a special case for
+ * workqueue locking issues. It's not meant for executing random crap
+ * with interrupts disabled. Abuse is monitored!
*/
-#define TBASE_DEFERRABLE_FLAG (0x1)
+#define TIMER_DEFERRABLE 0x1LU
+#define TIMER_IRQSAFE 0x2LU

-#define TIMER_INITIALIZER(_function, _expires, _data) { \
+#define TIMER_FLAG_MASK 0x3LU
+
+#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
.entry = { .prev = TIMER_ENTRY_STATIC }, \
.function = (_function), \
.expires = (_expires), \
.data = (_data), \
- .base = &boot_tvec_bases, \
+ .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \
.slack = -1, \
__TIMER_LOCKDEP_MAP_INITIALIZER( \
__FILE__ ":" __stringify(__LINE__)) \
}

-#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \
- ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
+#define TIMER_INITIALIZER(_function, _expires, _data) \
+ __TIMER_INITIALIZER((_function), (_expires), (_data), 0)

-#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
- .entry = { .prev = TIMER_ENTRY_STATIC }, \
- .function = (_function), \
- .expires = (_expires), \
- .data = (_data), \
- .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \
- __TIMER_LOCKDEP_MAP_INITIALIZER( \
- __FILE__ ":" __stringify(__LINE__)) \
- }
+#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \
+ __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE)

#define DEFINE_TIMER(_name, _function, _expires, _data) \
struct timer_list _name = \
TIMER_INITIALIZER(_function, _expires, _data)

-void init_timer_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key);
-void init_timer_deferrable_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key);
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key);
+
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+extern void init_timer_on_stack_key(struct timer_list *timer,
+ unsigned int flags, const char *name,
+ struct lock_class_key *key);
+extern void destroy_timer_on_stack(struct timer_list *timer);
+#else
+static inline void destroy_timer_on_stack(struct timer_list *timer) { }
+static inline void init_timer_on_stack_key(struct timer_list *timer,
+ unsigned int flags, const char *name,
+ struct lock_class_key *key)
+{
+ init_timer_key(timer, flags, name, key);
+}
+#endif

#ifdef CONFIG_LOCKDEP
-#define init_timer(timer) \
+#define __init_timer(_timer, _flags) \
do { \
static struct lock_class_key __key; \
- init_timer_key((timer), #timer, &__key); \
+ init_timer_key((_timer), (_flags), #_timer, &__key); \
} while (0)

-#define init_timer_deferrable(timer) \
+#define __init_timer_on_stack(_timer, _flags) \
do { \
static struct lock_class_key __key; \
- init_timer_deferrable_key((timer), #timer, &__key); \
+ init_timer_on_stack_key((_timer), (_flags), #_timer, &__key); \
} while (0)
+#else
+#define __init_timer(_timer, _flags) \
+ init_timer_key((_timer), (_flags), NULL, NULL)
+#define __init_timer_on_stack(_timer, _flags) \
+ init_timer_on_stack_key((_timer), (_flags), NULL, NULL)
+#endif

+#define init_timer(timer) \
+ __init_timer((timer), 0)
+#define init_timer_deferrable(timer) \
+ __init_timer((timer), TIMER_DEFERRABLE)
#define init_timer_on_stack(timer) \
+ __init_timer_on_stack((timer), 0)
+
+#define __setup_timer(_timer, _fn, _data, _flags) \
do { \
- static struct lock_class_key __key; \
- init_timer_on_stack_key((timer), #timer, &__key); \
+ __init_timer((_timer), (_flags)); \
+ (_timer)->function = (_fn); \
+ (_timer)->data = (_data); \
} while (0)

-#define setup_timer(timer, fn, data) \
+#define __setup_timer_on_stack(_timer, _fn, _data, _flags) \
do { \
- static struct lock_class_key __key; \
- setup_timer_key((timer), #timer, &__key, (fn), (data));\
+ __init_timer_on_stack((_timer), (_flags)); \
+ (_timer)->function = (_fn); \
+ (_timer)->data = (_data); \
} while (0)

+#define setup_timer(timer, fn, data) \
+ __setup_timer((timer), (fn), (data), 0)
#define setup_timer_on_stack(timer, fn, data) \
- do { \
- static struct lock_class_key __key; \
- setup_timer_on_stack_key((timer), #timer, &__key, \
- (fn), (data)); \
- } while (0)
+ __setup_timer_on_stack((timer), (fn), (data), 0)
#define setup_deferrable_timer_on_stack(timer, fn, data) \
- do { \
- static struct lock_class_key __key; \
- setup_deferrable_timer_on_stack_key((timer), #timer, \
- &__key, (fn), \
- (data)); \
- } while (0)
-#else
-#define init_timer(timer)\
- init_timer_key((timer), NULL, NULL)
-#define init_timer_deferrable(timer)\
- init_timer_deferrable_key((timer), NULL, NULL)
-#define init_timer_on_stack(timer)\
- init_timer_on_stack_key((timer), NULL, NULL)
-#define setup_timer(timer, fn, data)\
- setup_timer_key((timer), NULL, NULL, (fn), (data))
-#define setup_timer_on_stack(timer, fn, data)\
- setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
-#define setup_deferrable_timer_on_stack(timer, fn, data)\
- setup_deferrable_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
-#endif
-
-#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void init_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key);
-extern void destroy_timer_on_stack(struct timer_list *timer);
-#else
-static inline void destroy_timer_on_stack(struct timer_list *timer) { }
-static inline void init_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
-{
- init_timer_key(timer, name, key);
-}
-#endif
-
-static inline void setup_timer_key(struct timer_list * timer,
- const char *name,
- struct lock_class_key *key,
- void (*function)(unsigned long),
- unsigned long data)
-{
- timer->function = function;
- timer->data = data;
- init_timer_key(timer, name, key);
-}
-
-static inline void setup_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key,
- void (*function)(unsigned long),
- unsigned long data)
-{
- timer->function = function;
- timer->data = data;
- init_timer_on_stack_key(timer, name, key);
-}
-
-extern void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key,
- void (*function)(unsigned long),
- unsigned long data);
+ __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE)

/**
* timer_pending - is a timer pending?
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf..d226c6a 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
#include <linux/export.h>
#include <linux/timex.h>
#include <linux/capability.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/security.h>
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd4..8601f0d 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA
config GENERIC_TIME_VSYSCALL
bool

+# Timekeeping vsyscall support
+config GENERIC_TIME_VSYSCALL_OLD
+ bool
+
# ktime_t scalar 64bit nsec representation
config KTIME_SCALAR
bool
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index aa27d39..f11d83b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -37,7 +37,6 @@
static struct alarm_base {
spinlock_t lock;
struct timerqueue_head timerqueue;
- struct hrtimer timer;
ktime_t (*gettime)(void);
clockid_t base_clockid;
} alarm_bases[ALARM_NUMTYPE];
@@ -46,6 +45,8 @@ static struct alarm_base {
static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);

+static struct wakeup_source *ws;
+
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
@@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { }
* @base: pointer to the base where the timer is being run
* @alarm: pointer to alarm being enqueued.
*
- * Adds alarm to a alarm_base timerqueue and if necessary sets
- * an hrtimer to run.
+ * Adds alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
{
+ if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
+ timerqueue_del(&base->timerqueue, &alarm->node);
+
timerqueue_add(&base->timerqueue, &alarm->node);
alarm->state |= ALARMTIMER_STATE_ENQUEUED;
-
- if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
- hrtimer_try_to_cancel(&base->timer);
- hrtimer_start(&base->timer, alarm->node.expires,
- HRTIMER_MODE_ABS);
- }
}

/**
- * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
+ * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
* @base: pointer to the base where the timer is running
* @alarm: pointer to alarm being removed
*
- * Removes alarm to a alarm_base timerqueue and if necessary sets
- * a new timer to run.
+ * Removes alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
-static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
+static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
{
- struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
-
if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
return;

timerqueue_del(&base->timerqueue, &alarm->node);
alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
-
- if (next == &alarm->node) {
- hrtimer_try_to_cancel(&base->timer);
- next = timerqueue_getnext(&base->timerqueue);
- if (!next)
- return;
- hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
- }
}


@@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
*/
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
- struct alarm_base *base = container_of(timer, struct alarm_base, timer);
- struct timerqueue_node *next;
+ struct alarm *alarm = container_of(timer, struct alarm, timer);
+ struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
- ktime_t now;
int ret = HRTIMER_NORESTART;
int restart = ALARMTIMER_NORESTART;

spin_lock_irqsave(&base->lock, flags);
- now = base->gettime();
- while ((next = timerqueue_getnext(&base->timerqueue))) {
- struct alarm *alarm;
- ktime_t expired = next->expires;
-
- if (expired.tv64 > now.tv64)
- break;
-
- alarm = container_of(next, struct alarm, node);
-
- timerqueue_del(&base->timerqueue, &alarm->node);
- alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
-
- alarm->state |= ALARMTIMER_STATE_CALLBACK;
- spin_unlock_irqrestore(&base->lock, flags);
- if (alarm->function)
- restart = alarm->function(alarm, now);
- spin_lock_irqsave(&base->lock, flags);
- alarm->state &= ~ALARMTIMER_STATE_CALLBACK;
+ alarmtimer_dequeue(base, alarm);
+ spin_unlock_irqrestore(&base->lock, flags);

- if (restart != ALARMTIMER_NORESTART) {
- timerqueue_add(&base->timerqueue, &alarm->node);
- alarm->state |= ALARMTIMER_STATE_ENQUEUED;
- }
- }
+ if (alarm->function)
+ restart = alarm->function(alarm, base->gettime());

- if (next) {
- hrtimer_set_expires(&base->timer, next->expires);
+ spin_lock_irqsave(&base->lock, flags);
+ if (restart != ALARMTIMER_NORESTART) {
+ hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+ alarmtimer_enqueue(base, alarm);
ret = HRTIMER_RESTART;
}
spin_unlock_irqrestore(&base->lock, flags);
@@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev)
unsigned long flags;
struct rtc_device *rtc;
int i;
+ int ret;

spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
@@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev)
if (min.tv64 == 0)
return 0;

- /* XXX - Should we enforce a minimum sleep time? */
- WARN_ON(min.tv64 < NSEC_PER_SEC);
+ if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+ return -EBUSY;
+ }

/* Setup an rtc timer to fire that far in the future */
rtc_timer_cancel(rtc, &rtctimer);
@@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev)
now = rtc_tm_to_ktime(tm);
now = ktime_add(now, min);

- rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
-
- return 0;
+ /* Set alarm, if in the past reject suspend briefly to handle */
+ ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+ if (ret < 0)
+ __pm_wakeup_event(ws, MSEC_PER_SEC);
+ return ret;
}
#else
static int alarmtimer_suspend(struct device *dev)
@@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
+ hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
+ HRTIMER_MODE_ABS);
+ alarm->timer.function = alarmtimer_fired;
alarm->function = function;
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
@@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
-void alarm_start(struct alarm *alarm, ktime_t start)
+int alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
+ int ret;

spin_lock_irqsave(&base->lock, flags);
- if (alarmtimer_active(alarm))
- alarmtimer_remove(base, alarm);
alarm->node.expires = start;
alarmtimer_enqueue(base, alarm);
+ ret = hrtimer_start(&alarm->timer, alarm->node.expires,
+ HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
+ return ret;
}

/**
@@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
- int ret = -1;
- spin_lock_irqsave(&base->lock, flags);
-
- if (alarmtimer_callback_running(alarm))
- goto out;
+ int ret;

- if (alarmtimer_is_queued(alarm)) {
- alarmtimer_remove(base, alarm);
- ret = 1;
- } else
- ret = 0;
-out:
+ spin_lock_irqsave(&base->lock, flags);
+ ret = hrtimer_try_to_cancel(&alarm->timer);
+ if (ret >= 0)
+ alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
@@ -802,10 +773,6 @@ static int __init alarmtimer_init(void)
for (i = 0; i < ALARM_NUMTYPE; i++) {
timerqueue_init_head(&alarm_bases[i].timerqueue);
spin_lock_init(&alarm_bases[i].lock);
- hrtimer_init(&alarm_bases[i].timer,
- alarm_bases[i].base_clockid,
- HRTIMER_MODE_ABS);
- alarm_bases[i].timer.function = alarmtimer_fired;
}

error = alarmtimer_rtc_interface_setup();
@@ -821,6 +788,7 @@ static int __init alarmtimer_init(void)
error = PTR_ERR(pdev);
goto out_drv;
}
+ ws = wakeup_source_register("alarmtimer");
return 0;

out_drv:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da053..6629bf7 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
* requested HZ value. It is also not recommended
* for "tick-less" systems.
*/
-#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
+#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)

/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}
+
+struct clocksource refined_jiffies;
+
+int register_refined_jiffies(long cycles_per_second)
+{
+ u64 nsec_per_tick, shift_hz;
+ long cycles_per_tick;
+
+
+
+ refined_jiffies = clocksource_jiffies;
+ refined_jiffies.name = "refined-jiffies";
+ refined_jiffies.rating++;
+
+ /* Calc cycles per tick */
+ cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
+ /* shift_hz stores hz<<8 for extra accuracy */
+ shift_hz = (u64)cycles_per_second << 8;
+ shift_hz += cycles_per_tick/2;
+ do_div(shift_hz, cycles_per_tick);
+ /* Calculate nsec_per_tick using shift_hz */
+ nsec_per_tick = (u64)NSEC_PER_SEC << 8;
+ nsec_per_tick += (u32)shift_hz/2;
+ do_div(nsec_per_tick, (u32)shift_hz);
+
+ refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
+
+ clocksource_register(&refined_jiffies);
+ return 0;
+}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d3b91e7..3eb3fc7c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -8,6 +8,7 @@
*
*/

+#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
@@ -21,61 +22,6 @@
#include <linux/tick.h>
#include <linux/stop_machine.h>

-/* Structure holding internal timekeeping values. */
-struct timekeeper {
- /* Current clocksource used for timekeeping. */
- struct clocksource *clock;
- /* NTP adjusted clock multiplier */
- u32 mult;
- /* The shift value of the current clocksource. */
- u32 shift;
- /* Number of clock cycles in one NTP interval. */
- cycle_t cycle_interval;
- /* Number of clock shifted nano seconds in one NTP interval. */
- u64 xtime_interval;
- /* shifted nano seconds left over when rounding cycle_interval */
- s64 xtime_remainder;
- /* Raw nano seconds accumulated per NTP interval. */
- u32 raw_interval;
-
- /* Current CLOCK_REALTIME time in seconds */
- u64 xtime_sec;
- /* Clock shifted nano seconds */
- u64 xtime_nsec;
-
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
- s64 ntp_error;
- /* Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds. */
- u32 ntp_error_shift;
-
- /*
- * wall_to_monotonic is what we need to add to xtime (or xtime corrected
- * for sub jiffie times) to get to monotonic time. Monotonic is pegged
- * at zero at system boot time, so wall_to_monotonic will be negative,
- * however, we will ALWAYS keep the tv_nsec part positive so we can use
- * the usual normalization.
- *
- * wall_to_monotonic is moved after resume from suspend for the
- * monotonic time not to jump. We need to add total_sleep_time to
- * wall_to_monotonic to get the real boot based time offset.
- *
- * - wall_to_monotonic is no longer the boot time, getboottime must be
- * used instead.
- */
- struct timespec wall_to_monotonic;
- /* Offset clock monotonic -> clock realtime */
- ktime_t offs_real;
- /* time spent in suspend */
- struct timespec total_sleep_time;
- /* Offset clock monotonic -> clock boottime */
- ktime_t offs_boot;
- /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
- struct timespec raw_time;
- /* Seqlock for all timekeeper values */
- seqlock_t lock;
-};

static struct timekeeper timekeeper;

@@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
}
}

-static struct timespec tk_xtime(struct timekeeper *tk)
-{
- struct timespec ts;
-
- ts.tv_sec = tk->xtime_sec;
- ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
- return ts;
-}
-
static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
{
tk->xtime_sec = ts->tv_sec;
@@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
/* must hold write on timekeeper.lock */
static void timekeeping_update(struct timekeeper *tk, bool clearntp)
{
- struct timespec xt;
-
if (clearntp) {
tk->ntp_error = 0;
ntp_clear();
}
- xt = tk_xtime(tk);
- update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+ update_vsyscall(tk);
}

/**
@@ -1111,7 +1045,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
accumulate_nsecs_to_secs(tk);

/* Accumulate raw time */
- raw_nsecs = tk->raw_interval << shift;
+ raw_nsecs = (u64)tk->raw_interval << shift;
raw_nsecs += tk->raw_time.tv_nsec;
if (raw_nsecs >= NSEC_PER_SEC) {
u64 raw_secs = raw_nsecs;
@@ -1128,6 +1062,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
return offset;
}

+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
+static inline void old_vsyscall_fixup(struct timekeeper *tk)
+{
+ s64 remainder;
+
+ /*
+ * Store only full nanoseconds into xtime_nsec after rounding
+ * it up and add the remainder to the error difference.
+ * XXX - This is necessary to avoid small 1ns inconsistnecies caused
+ * by truncating the remainder in vsyscalls. However, it causes
+ * additional work to be done in timekeeping_adjust(). Once
+ * the vsyscall implementations are converted to use xtime_nsec
+ * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
+ * users are removed, this can be killed.
+ */
+ remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
+ tk->xtime_nsec -= remainder;
+ tk->xtime_nsec += 1ULL << tk->shift;
+ tk->ntp_error += remainder << tk->ntp_error_shift;
+
+}
+#else
+#define old_vsyscall_fixup(tk)
+#endif
+
+
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -1139,7 +1100,6 @@ static void update_wall_time(void)
cycle_t offset;
int shift = 0, maxshift;
unsigned long flags;
- s64 remainder;

write_seqlock_irqsave(&tk->lock, flags);

@@ -1181,20 +1141,11 @@ static void update_wall_time(void)
/* correct the clock when NTP error is too big */
timekeeping_adjust(tk, offset);

-
/*
- * Store only full nanoseconds into xtime_nsec after rounding
- * it up and add the remainder to the error difference.
- * XXX - This is necessary to avoid small 1ns inconsistnecies caused
- * by truncating the remainder in vsyscalls. However, it causes
- * additional work to be done in timekeeping_adjust(). Once
- * the vsyscall implementations are converted to use xtime_nsec
- * (shifted nanoseconds), this can be killed.
- */
- remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
- tk->xtime_nsec -= remainder;
- tk->xtime_nsec += 1ULL << tk->shift;
- tk->ntp_error += remainder << tk->ntp_error_shift;
+ * XXX This can be killed once everyone converts
+ * to the new update_vsyscall.
+ */
+ old_vsyscall_fixup(tk);

/*
* Finally, make sure that after the rounding
diff --git a/kernel/timer.c b/kernel/timer.c
index 8c5e7b9..367d008 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64);
#define TVR_SIZE (1 << TVR_BITS)
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))

struct tvec {
struct list_head vec[TVN_SIZE];
@@ -92,24 +93,25 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
/* Functions below help us manage 'deferrable' flag */
static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
{
- return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
+ return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
}

-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
{
- return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+ return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
}

-static inline void timer_set_deferrable(struct timer_list *timer)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
{
- timer->base = TBASE_MAKE_DEFERRED(timer->base);
+ return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
}

static inline void
timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
{
- timer->base = (struct tvec_base *)((unsigned long)(new_base) |
- tbase_get_deferrable(timer->base));
+ unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
+
+ timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
}

static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -358,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
} else {
int i;
- /* If the timeout is larger than 0xffffffff on 64-bit
- * architectures then we use the maximum timeout:
+ /* If the timeout is larger than MAX_TVAL (on 64-bit
+ * architectures or with CONFIG_BASE_SMALL=1) then we
+ * use the maximum timeout.
*/
- if (idx > 0xffffffffUL) {
- idx = 0xffffffffUL;
+ if (idx > MAX_TVAL) {
+ idx = MAX_TVAL;
expires = idx + base->timer_jiffies;
}
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
@@ -563,16 +566,14 @@ static inline void debug_timer_assert_init(struct timer_list *timer)
debug_object_assert_init(timer, &timer_debug_descr);
}

-static void __init_timer(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key);
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key);

-void init_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
{
debug_object_init_on_stack(timer, &timer_debug_descr);
- __init_timer(timer, name, key);
+ do_init_timer(timer, flags, name, key);
}
EXPORT_SYMBOL_GPL(init_timer_on_stack_key);

@@ -613,12 +614,13 @@ static inline void debug_assert_init(struct timer_list *timer)
debug_timer_assert_init(timer);
}

-static void __init_timer(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
{
+ struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
+
timer->entry.next = NULL;
- timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->base = (void *)((unsigned long)base | flags);
timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
@@ -628,22 +630,10 @@ static void __init_timer(struct timer_list *timer,
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}

-void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key,
- void (*function)(unsigned long),
- unsigned long data)
-{
- timer->function = function;
- timer->data = data;
- init_timer_on_stack_key(timer, name, key);
- timer_set_deferrable(timer);
-}
-EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
-
/**
* init_timer_key - initialize a timer
* @timer: the timer to be initialized
+ * @flags: timer flags
* @name: name of the timer
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
@@ -651,24 +641,14 @@ EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
* init_timer_key() must be done to a timer prior calling *any* of the
* other timer functions.
*/
-void init_timer_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
{
debug_init(timer);
- __init_timer(timer, name, key);
+ do_init_timer(timer, flags, name, key);
}
EXPORT_SYMBOL(init_timer_key);

-void init_timer_deferrable_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
-{
- init_timer_key(timer, name, key);
- timer_set_deferrable(timer);
-}
-EXPORT_SYMBOL(init_timer_deferrable_key);
-
static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
struct list_head *entry = &timer->entry;
@@ -686,7 +666,7 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
{
detach_timer(timer, true);
if (!tbase_get_deferrable(timer->base))
- timer->base->active_timers--;
+ base->active_timers--;
}

static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -697,7 +677,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,

detach_timer(timer, clear_pending);
if (!tbase_get_deferrable(timer->base)) {
- timer->base->active_timers--;
+ base->active_timers--;
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
@@ -1029,14 +1009,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
*
* Synchronization rules: Callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which would prevent
- * completion of the timer's handler. The timer's handler must not call
- * add_timer_on(). Upon exit the timer is not queued and the handler is
- * not running on any CPU.
+ * interrupt contexts unless the timer is an irqsafe one. The caller must
+ * not hold locks which would prevent completion of the timer's
+ * handler. The timer's handler must not call add_timer_on(). Upon exit the
+ * timer is not queued and the handler is not running on any CPU.
*
- * Note: You must not hold locks that are held in interrupt context
- * while calling this function. Even if the lock has nothing to do
- * with the timer in question. Here's why:
+ * Note: For !irqsafe timers, you must not hold locks that are held in
+ * interrupt context while calling this function. Even if the lock has
+ * nothing to do with the timer in question. Here's why:
*
* CPU0 CPU1
* ---- ----
@@ -1073,7 +1053,7 @@ int del_timer_sync(struct timer_list *timer)
* don't use it in hardirq context, because it
* could lead to deadlock.
*/
- WARN_ON(in_irq());
+ WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
for (;;) {
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
@@ -1180,19 +1160,27 @@ static inline void __run_timers(struct tvec_base *base)
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
+ bool irqsafe;

timer = list_first_entry(head, struct timer_list,entry);
fn = timer->function;
data = timer->data;
+ irqsafe = tbase_get_irqsafe(timer->base);

timer_stats_account_timer(timer);

base->running_timer = timer;
detach_expired_timer(timer, base);

- spin_unlock_irq(&base->lock);
- call_timer_fn(timer, fn, data);
- spin_lock_irq(&base->lock);
+ if (irqsafe) {
+ spin_unlock(&base->lock);
+ call_timer_fn(timer, fn, data);
+ spin_lock(&base->lock);
+ } else {
+ spin_unlock_irq(&base->lock);
+ call_timer_fn(timer, fn, data);
+ spin_lock_irq(&base->lock);
+ }
}
}
base->running_timer = NULL;
@@ -1791,9 +1779,13 @@ static struct notifier_block __cpuinitdata timers_nb = {

void __init init_timers(void)
{
- int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
- (void *)(long)smp_processor_id());
+ int err;
+
+ /* ensure there are enough low bits for flags in timer->base pointer */
+ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);

+ err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
init_timer_stats();

BUG_ON(err != NOTIFY_OK);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/