[PATCH] Fix POSIX timers to give CLOCK_MONOTONIC full resolution and tie it to xtime instead of jiffies

From: george anzinger (george@mvista.com)
Date: Mon Apr 14 2003 - 19:09:49 EST


The POSIX CLOCK_MONOTONIC currently has only 1/HZ resolution.
Further, it is tied to jiffies (i.e. is a restatment of jiffies)
rather than "xtime" or the gettimeofday() clock.

This patch changes CLOCK_MONOTONIC to be a restatment of
gettimeofday() plus an offset to remove any clock setting activity
from CLOCK_MONOTONIC. An offset is kept that represents the
difference between CLOCK_MONOTONIC and gettimeofday(). This offset is
updated when ever the gettimeofday() clock is set to back the clock
setting change out of CLOCK_MONOTONIC (which by the standard, can not
be set).

With this change CLOCK_REALTIME (a direct restatement of
gettimeofday()), CLOCK_MONOTONIC and gettimeofday() will all tick at
the same time and with the same rate. And all will be affected by NTP
adjustments (save those which actually set the time).

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.5.67-bk3-fix/arch/i386/kernel/time.c linux/arch/i386/kernel/time.c --- linux-2.5.67-bk3-fix/arch/i386/kernel/time.c 2003-04-09 18:32:56.000000000 -0700 +++ linux/arch/i386/kernel/time.c 2003-04-12 02:49:36.000000000 -0700 @@ -121,15 +121,28 @@ * made, and then undo it! */ tv->tv_usec -= timer->get_offset(); - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + tv->tv_usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ); while (tv->tv_usec < 0) { - tv->tv_usec += 1000000; + tv->tv_usec += USEC_PER_SEC; tv->tv_sec--; } + tv->tv_usec *= NSEC_PER_USEC; + + wall_to_monotonic.tv_sec += xtime.tv_sec - tv->tv_sec; + wall_to_monotonic.tv_nsec += xtime.tv_nsec - tv->tv_usec; + + if(wall_to_monotonic.tv_nsec > NSEC_PER_SEC){ + wall_to_monotonic.tv_nsec -= NSEC_PER_SEC; + wall_to_monotonic.tv_sec++; + } + if(wall_to_monotonic.tv_nsec < 0){ + wall_to_monotonic.tv_nsec += NSEC_PER_SEC; + wall_to_monotonic.tv_sec--; + } xtime.tv_sec = tv->tv_sec; - xtime.tv_nsec = (tv->tv_usec * 1000); + xtime.tv_nsec = tv->tv_usec; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; @@ -401,7 +414,9 @@ { xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = 0; + wall_to_monotonic.tv_sec = -xtime.tv_sec + INITIAL_JIFFIES / HZ; + xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + wall_to_monotonic.tv_nsec = 0; timer = select_timer(); diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.5.67-bk3-fix/include/linux/time.h linux/include/linux/time.h --- linux-2.5.67-bk3-fix/include/linux/time.h 2003-04-12 02:16:29.000000000 -0700 +++ linux/include/linux/time.h 2003-04-12 02:49:36.000000000 -0700 @@ -140,6 +140,7 @@ } extern struct timespec xtime; +extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; static inline unsigned long get_seconds(void) @@ -200,6 +201,9 @@ #define CLOCK_MONOTONIC_HR 5 #define MAX_CLOCKS 6 +#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \ + CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR) +#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR) /* * The various flags for setting POSIX.1b interval timers. diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.5.67-bk3-fix/kernel/posix-timers.c linux/kernel/posix-timers.c --- linux-2.5.67-bk3-fix/kernel/posix-timers.c 2003-04-12 02:34:42.000000000 -0700 +++ linux/kernel/posix-timers.c 2003-04-14 16:10:43.000000000 -0700 @@ -48,7 +48,7 @@ * The idr_get_new *may* call slab for more memory so it must not be * called under a spin lock. Likewise idr_remore may release memory * (but it may be ok to do this under a lock...). - * idr_find is just a memory look up and is quite fast. A zero return + * idr_find is just a memory look up and is quite fast. A -1 return * indicates that the requested id does not exist. */ @@ -82,6 +82,7 @@ * For some reason mips/mips64 define the SIGEV constants plus 128. * Here we define a mask to get rid of the common bits. The * optimizer should make this costless to all but mips. + * Note that no common bits (the non-mips case) will give 0xffffffff. */ #define MIPS_SIGEV ~(SIGEV_NONE & \ SIGEV_SIGNAL & \ @@ -91,7 +92,7 @@ * The timer ID is turned into a timer address by idr_find(). * Verifying a valid ID consists of: * - * a) checking that idr_find() returns other than zero. + * a) checking that idr_find() returns other than -1. * b) checking that the timer id matches the one in the timer itself. * c) that the timer owner is in the callers thread group. */ @@ -160,6 +161,8 @@ void register_posix_clock(int clock_id, struct k_clock *new_clock); static int do_posix_gettime(struct k_clock *clock, struct timespec *tp); +static u64 do_posix_clock_monotonic_gettime_parts( + struct timespec *tp, struct timespec *mo); int do_posix_clock_monotonic_gettime(struct timespec *tp); int do_posix_clock_monotonic_settime(struct timespec *tp); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -190,7 +193,7 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff) { - unsigned long sec = tp->tv_sec; + long sec = tp->tv_sec; long nsec = tp->tv_nsec + res - 1; if (nsec > NSEC_PER_SEC) { @@ -208,7 +211,7 @@ * below. Here it is enough to just discard the high order * bits. */ - *jiff = (u64)sec * HZ; + *jiff = (s64)sec * HZ; /* * Do the res thing. (Don't forget the add in the declaration of nsec) */ @@ -219,17 +222,6 @@ *jiff += nsec / (NSEC_PER_SEC / HZ); } -static void tstotimer(struct itimerspec *time, struct k_itimer *timer) -{ - u64 result; - int res = posix_clocks[timer->it_clock].res; - - tstojiffie(&time->it_value, res, &result); - timer->it_timer.expires = (unsigned long)result; - tstojiffie(&time->it_interval, res, &result); - timer->it_incr = (unsigned long)result; -} - static void schedule_next_timer(struct k_itimer *timr) { struct now_struct now; @@ -693,57 +685,81 @@ * If it is relative time, we need to add the current (CLOCK_MONOTONIC) * time to it to get the proper time for the timer. */ -static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, int abs) +static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, + int abs, u64 *exp) { struct timespec now; - struct timespec oc; - do_posix_clock_monotonic_gettime(&now); - - if (!abs || (posix_clocks[CLOCK_MONOTONIC].clock_get != - clock->clock_get)) { - if (abs) - do_posix_gettime(clock, &oc); - else - oc.tv_nsec = oc.tv_sec = 0; - - tp->tv_sec += now.tv_sec - oc.tv_sec; - tp->tv_nsec += now.tv_nsec - oc.tv_nsec; + struct timespec oc = *tp; + struct timespec wall_to_mono; + u64 jiffies_64_f; + int rtn =0; + if(abs){ + /* + * The mask pick up the 4 basic clocks + */ + if(! (clock - &posix_clocks[0]) & ~CLOCKS_MASK){ + jiffies_64_f = do_posix_clock_monotonic_gettime_parts( + &now, &wall_to_mono); + /* + * If we are doing a MONOTONIC clock + */ + if((clock - &posix_clocks[0]) & CLOCKS_MONO){ + now.tv_sec += wall_to_mono.tv_sec; + now.tv_nsec += wall_to_mono.tv_nsec; + } + }else{ + /* + * Not one of the basic clocks + */ + do_posix_gettime(clock, &now); + jiffies_64_f = get_jiffies_64(); + } + /* + * Take away now to get delta + */ + oc.tv_sec -= now.tv_sec; + oc.tv_nsec -= now.tv_nsec; /* * Normalize... */ - if ((tp->tv_nsec - NSEC_PER_SEC) >= 0) { - tp->tv_nsec -= NSEC_PER_SEC; - tp->tv_sec++; + while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) { + oc.tv_nsec -= NSEC_PER_SEC; + oc.tv_sec++; } - if ((tp->tv_nsec) < 0) { - tp->tv_nsec += NSEC_PER_SEC; - tp->tv_sec--; + while ((oc.tv_nsec) < 0) { + oc.tv_nsec += NSEC_PER_SEC; + oc.tv_sec--; } + }else{ + jiffies_64_f = get_jiffies_64(); } /* - * Check if the requested time is prior to now (if so set now) or - * is more than the timer code can handle (if so we error out). - * The (unsigned) catches the case of prior to "now" with the same - * test. Only on failure do we sort out what happened, and then - * we use the (unsigned) to error out negative seconds. + * Check if the requested time is prior to now (if so set now) */ - if ((unsigned) (tp->tv_sec - now.tv_sec) > (MAX_JIFFY_OFFSET / HZ)) { - if ((unsigned) tp->tv_sec < now.tv_sec) { - tp->tv_sec = now.tv_sec; - tp->tv_nsec = now.tv_nsec; - } else + if( oc.tv_sec < 0) + oc.tv_sec = oc.tv_nsec = 0; + tstojiffie(&oc, clock->res, exp); + + /* + * Check if the requested time is more than the timer code + * can handle (if so we error out but return the value too). + */ + if ( *exp > ((u64)MAX_JIFFY_OFFSET )) /* * This is a considered response, not exactly in * line with the standard (in fact it is silent on - * possible overflows). We assume such a large + * possible overflows). We assume such a large * value is ALMOST always a programming error and * try not to compound it by setting a really dumb * value. */ - return -EINVAL; - } - return 0; + rtn = -EINVAL; + /* + * return the actual jiffies expire time, full 64 bits + */ + *exp += jiffies_64_f; + return rtn; } /* Set a POSIX.1b interval timer. */ @@ -753,6 +769,7 @@ struct itimerspec *new_setting, struct itimerspec *old_setting) { struct k_clock *clock = &posix_clocks[timr->it_clock]; + u64 expire_64; if (old_setting) do_timer_gettime(timr, old_setting); @@ -790,14 +807,15 @@ return 0; } - if ((flags & TIMER_ABSTIME) && - (clock->clock_get != do_posix_clock_monotonic_gettime)) - // FIXME: what is this? - ; if (adjust_abs_time(clock, - &new_setting->it_value, flags & TIMER_ABSTIME)) + &new_setting->it_value, flags & TIMER_ABSTIME, + &expire_64)) { return -EINVAL; - tstotimer(new_setting, timr); + } + timr->it_timer.expires = (unsigned long)expire_64; + tstojiffie(&new_setting->it_interval, clock->res, &expire_64); + timr->it_incr = (unsigned long)expire_64; + /* * For some reason the timer does not fire immediately if expires is @@ -966,30 +984,46 @@ * Note also that the while loop assures that the sub_jiff_offset * will be less than a jiffie, thus no need to normalize the result. * Well, not really, if called with ints off :( - * - * HELP, this code should make an attempt at resolution beyond the - * jiffie. Trouble is this is "arch" dependent... */ -int do_posix_clock_monotonic_gettime(struct timespec *tp) +static u64 do_posix_clock_monotonic_gettime_parts( + struct timespec *tp, struct timespec *mo) { - long sub_sec; - u64 jiffies_64_f; - -#if (BITS_PER_LONG > 32) - jiffies_64_f = jiffies_64; -#else + u64 jiff; + struct timeval tpv; unsigned int seq; do { seq = read_seqbegin(&xtime_lock); - jiffies_64_f = jiffies_64; + do_gettimeofday(&tpv); + *mo = wall_to_monotonic; + jiff = jiffies_64; - } while (read_seqretry(&xtime_lock, seq)); -#endif - tp->tv_sec = div_long_long_rem(jiffies_64_f, HZ, &sub_sec); - tp->tv_nsec = sub_sec * (NSEC_PER_SEC / HZ); + }while( read_seqretry(&xtime_lock, seq)); + /* + * Love to get this before it is converted to usec. + * It would save a div AND a mpy. + */ + tp->tv_sec = tpv.tv_sec; + tp->tv_nsec = tpv.tv_usec * NSEC_PER_USEC; + + return jiff; +} + +int do_posix_clock_monotonic_gettime(struct timespec *tp) +{ + struct timespec wall_to_mono; + + do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); + + tp->tv_sec += wall_to_mono.tv_sec; + tp->tv_nsec += wall_to_mono.tv_nsec; + + if( (tp->tv_nsec - NSEC_PER_SEC) > 0){ + tp->tv_nsec -= NSEC_PER_SEC; + tp->tv_sec++; + } return 0; } @@ -1140,7 +1174,7 @@ struct timespec t; struct timer_list new_timer; DECLARE_WAITQUEUE(abs_wqueue, current); - u64 rq_time = 0; + u64 rq_time = (u64)0; s64 left; int abs; struct restart_block *restart_block = @@ -1165,7 +1199,7 @@ if (!rq_time) return -EINTR; left = rq_time - get_jiffies_64(); - if (left <= 0LL) + if (left <= (s64)0) return 0; /* Already passed */ } @@ -1176,14 +1210,14 @@ do { t = *tsave; if (abs || !rq_time) { - adjust_abs_time(&posix_clocks[which_clock], &t, abs); - tstojiffie(&t, posix_clocks[which_clock].res, &rq_time); + adjust_abs_time(&posix_clocks[which_clock], &t, abs, + &rq_time); } left = rq_time - get_jiffies_64(); - if (left >= MAX_JIFFY_OFFSET) - left = MAX_JIFFY_OFFSET; - if (left < 0) + if (left >= (s64)MAX_JIFFY_OFFSET) + left = (s64)MAX_JIFFY_OFFSET; + if (left < (s64)0) break; new_timer.expires = jiffies + left; @@ -1194,12 +1228,12 @@ del_timer_sync(&new_timer); left = rq_time - get_jiffies_64(); - } while (left > 0 && !test_thread_flag(TIF_SIGPENDING)); + } while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); if (abs_wqueue.task_list.next) finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); - if (left > 0) { + if (left > (s64)0) { unsigned long rmd; /* diff -urP -I '\$Id:.*Exp \$' -X /usr/src/patch.exclude linux-2.5.67-bk3-fix/kernel/timer.c linux/kernel/timer.c --- linux-2.5.67-bk3-fix/kernel/timer.c 2003-04-12 02:16:30.000000000 -0700 +++ linux/kernel/timer.c 2003-04-12 02:49:36.000000000 -0700 @@ -440,8 +440,16 @@ unsigned long tick_usec = TICK_USEC; /* ACTHZ period (usec) */ unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */ -/* The current time */ +/* + * The current time + * wall_to_monotonic is what we need to add to xtime (or xtime corrected + * for sub jiffie times) to get to monotonic time. Monotonic is pegged at zero + * at zero at system boot time, so wall_to_monotonic will be negative, + * however, we will ALWAYS keep the tv_nsec part positive so we can use + * the usual normalization. + */ struct timespec xtime __attribute__ ((aligned (16))); +struct timespec wall_to_monotonic __attribute__ ((aligned (16))); /* Don't completely fail for HZ > 500. */ int tickadj = 500/HZ ? : 1; /* microsecs */ @@ -507,6 +515,7 @@ case TIME_INS: if (xtime.tv_sec % 86400 == 0) { xtime.tv_sec--; + wall_to_monotonic.tv_sec++; time_state = TIME_OOP; clock_was_set(); printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); @@ -516,6 +525,7 @@ case TIME_DEL: if ((xtime.tv_sec + 1) % 86400 == 0) { xtime.tv_sec++; + wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; clock_was_set(); printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); Binary files linux-2.5.67-bk3-fix/usr/initramfs_data.cpio and linux/usr/initramfs_data.cpio differ Binary files linux-2.5.67-bk3-fix/usr/initramfs_data.cpio.gz and linux/usr/initramfs_data.cpio.gz differ

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Apr 15 2003 - 22:00:34 EST