further xtime race fixes [was Re: [patch] fix for xtime SMP race]

Andrea Arcangeli (andrea@e-mind.com)
Fri, 25 Dec 1998 18:36:04 +0100 (CET)


On Thu, 24 Dec 1998, Andrea Arcangeli wrote:

> Here the patch against 2.1.132 to make get_time_fast() SMP safe:

This patch will fix also all other raw accesses to xtime. I think it's
really needed because there are syscalls in kernel/time.c that are writing
and reading xtime and running only a cli() before to play with it (while
instead there could be another of these syscalls running on the other
CPU or similar bad things with bh handlers run by schedule() if I rember
well). The s/xtime/__xtime/ is just to allow the compiler to help us in
the s/xtime/get_xtime/...

The patch should fix everything except archs other than i386 and the patch
is incremental against the last one of this thread that inserted the
xtime_lock spinlock in sched.c:

Index: arch/i386/kernel/time.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/time.c,v
retrieving revision 1.1.1.1.2.12
diff -u -r1.1.1.1.2.12 time.c
--- time.c 1998/12/24 17:19:01 1.1.1.1.2.12
+++ linux/arch/i386/kernel/time.c 1998/12/25 15:48:46
@@ -84,8 +84,6 @@
*/
static unsigned long fast_gettimeoffset_quotient=0;

-extern rwlock_t xtime_lock;
-
static unsigned long do_fast_gettimeoffset(void)
{
register unsigned long eax asm("ax");
@@ -242,7 +240,7 @@
unsigned long flags;

read_lock_irqsave(&xtime_lock, flags);
- *tv = xtime;
+ *tv = __xtime;
tv->tv_usec += do_gettimeoffset();
if (lost_ticks)
tv->tv_usec += lost_ticks * (1000000/HZ);
@@ -269,7 +267,7 @@
tv->tv_sec--;
}

- xtime = *tv;
+ __xtime = *tv;
time_state = TIME_BAD;
time_maxerror = MAXPHASE;
time_esterror = MAXPHASE;
@@ -369,13 +367,13 @@
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
*/
- if (time_state != TIME_BAD && xtime.tv_sec > last_rtc_update + 660 &&
- xtime.tv_usec > 500000 - (tick >> 1) &&
- xtime.tv_usec < 500000 + (tick >> 1)) {
- if (set_rtc_mmss(xtime.tv_sec) == 0)
- last_rtc_update = xtime.tv_sec;
+ if (time_state != TIME_BAD && __xtime.tv_sec > last_rtc_update + 660 &&
+ __xtime.tv_usec > 500000 - (tick >> 1) &&
+ __xtime.tv_usec < 500000 + (tick >> 1)) {
+ if (set_rtc_mmss(__xtime.tv_sec) == 0)
+ last_rtc_update = __xtime.tv_sec;
else
- last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
+ last_rtc_update = __xtime.tv_sec - 600; /* do it again in 60 s */
}
#if 0
/* As we return to user mode fire off the other CPU schedulers.. this is
@@ -604,8 +602,8 @@

__initfunc(void time_init(void))
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_usec = 0;
+ __xtime.tv_sec = get_cmos_time();
+ __xtime.tv_usec = 0;

/*
* If we have APM enabled or the CPU clock speed is variable
Index: fs/affs/file.c
===================================================================
RCS file: /var/cvs/linux/fs/affs/file.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 file.c
--- file.c 1998/11/19 23:01:10 1.1.1.1
+++ linux/fs/affs/file.c 1998/12/25 15:59:30
@@ -280,7 +280,7 @@
}
}
kc = NULL;
- tv = xtime;
+ tv = get_xtime();
for (i = 0; i < 4; i++) {
tkc = &inode->u.affs_i.i_ec->kc[i];
if (tkc->kc_lru_time.tv_sec > tv.tv_sec)
@@ -293,7 +293,7 @@
}
if (!kc) /* Really shouldn't happen */
kc = tkc;
- kc->kc_lru_time = xtime;
+ kc->kc_lru_time = get_xtime();
keyp = kc->kc_keys;
kc->kc_first = block;
kc->kc_last = -1;
Index: fs/nfsd/nfssvc.c
===================================================================
RCS file: /var/cvs/linux/fs/nfsd/nfssvc.c,v
retrieving revision 1.1.1.1.2.3
diff -u -r1.1.1.1.2.3 nfssvc.c
--- nfssvc.c 1998/12/17 14:44:02 1.1.1.1.2.3
+++ linux/fs/nfsd/nfssvc.c 1998/12/25 15:58:50
@@ -98,7 +98,7 @@
oldumask = current->fs->umask; /* Set umask to 0. */
current->fs->umask = 0;
if (!nfsd_active++)
- nfssvc_boot = xtime; /* record boot time */
+ nfssvc_boot = get_xtime(); /* record boot time */
lockd_up(); /* start lockd */

/*
Index: fs/proc/array.c
===================================================================
RCS file: /var/cvs/linux/fs/proc/array.c,v
retrieving revision 1.1.1.1.2.6
diff -u -r1.1.1.1.2.6 array.c
--- array.c 1998/12/20 15:51:27 1.1.1.1.2.6
+++ linux/fs/proc/array.c 1998/12/25 15:59:15
@@ -302,7 +302,7 @@
"btime %lu\n"
"processes %lu\n",
kstat.context_swtch,
- xtime.tv_sec - jiffies / HZ,
+ get_xtime().tv_sec - jiffies / HZ,
total_forks);
return len;
}
Index: include/linux/sched.h
===================================================================
RCS file: /var/cvs/linux/include/linux/sched.h,v
retrieving revision 1.1.1.1.2.3
diff -u -r1.1.1.1.2.3 sched.h
--- sched.h 1998/12/17 14:44:38 1.1.1.1.2.3
+++ linux/include/linux/sched.h 1998/12/25 16:23:23
@@ -118,6 +118,11 @@
extern rwlock_t tasklist_lock;
extern spinlock_t scheduler_lock;

+/*
+ * This serialize xtime read/write accesses to avoid SMP races. -arca
+ */
+extern rwlock_t xtime_lock;
+
extern void sched_init(void);
extern void show_state(void);
extern void trap_init(void);
@@ -450,14 +455,14 @@
extern unsigned long volatile jiffies;
extern unsigned long itimer_ticks;
extern unsigned long itimer_next;
-extern struct timeval xtime;
+extern struct timeval __xtime;
extern void do_timer(struct pt_regs *);

extern unsigned int * prof_buffer;
extern unsigned long prof_len;
extern unsigned long prof_shift;

-#define CURRENT_TIME (xtime.tv_sec)
+#define CURRENT_TIME (get_xtime().tv_sec)

extern void FASTCALL(__wake_up(struct wait_queue ** p, unsigned int mode));
extern void FASTCALL(sleep_on(struct wait_queue ** p));
@@ -662,6 +667,21 @@
write_lock_irqsave(&waitqueue_lock, flags);
__remove_wait_queue(p, wait);
write_unlock_irqrestore(&waitqueue_lock, flags);
+}
+
+/*
+ * This function returns us an always correct xtime value avoiding races. -arca
+ */
+extern inline struct timeval get_xtime(void)
+{
+ struct timeval tv;
+ unsigned long flags;
+
+ read_lock_irqsave(&xtime_lock, flags);
+ tv = __xtime;
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ return tv;
}

#define REMOVE_LINKS(p) do { \
Index: kernel/acct.c
===================================================================
RCS file: /var/cvs/linux/kernel/acct.c,v
retrieving revision 1.1.1.1.2.1
diff -u -r1.1.1.1.2.1 acct.c
--- acct.c 1998/11/27 10:41:40 1.1.1.1.2.1
+++ linux/kernel/acct.c 1998/12/25 16:00:02
@@ -291,7 +291,7 @@
strncpy(ac.ac_comm, current->comm, ACCT_COMM);
ac.ac_comm[ACCT_COMM - 1] = '\0';

- ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
+ ac.ac_btime = CT_TO_SECS(current->start_time) + (get_xtime().tv_sec - (jiffies / HZ));
ac.ac_etime = encode_comp_t(jiffies - current->start_time);
ac.ac_utime = encode_comp_t(current->times.tms_utime);
ac.ac_stime = encode_comp_t(current->times.tms_stime);
Index: kernel/ksyms.c
===================================================================
RCS file: /var/cvs/linux/kernel/ksyms.c,v
retrieving revision 1.1.1.1.2.8
diff -u -r1.1.1.1.2.8 ksyms.c
--- ksyms.c 1998/12/20 15:51:31 1.1.1.1.2.8
+++ linux/kernel/ksyms.c 1998/12/25 16:00:31
@@ -302,7 +302,8 @@
EXPORT_SYMBOL(schedule);
EXPORT_SYMBOL(schedule_timeout);
EXPORT_SYMBOL(jiffies);
-EXPORT_SYMBOL(xtime);
+EXPORT_SYMBOL(__xtime);
+EXPORT_SYMBOL(xtime_lock);
EXPORT_SYMBOL(do_gettimeofday);
EXPORT_SYMBOL(loops_per_sec);
EXPORT_SYMBOL(kstat);
Index: kernel/sched.c
===================================================================
RCS file: /var/cvs/linux/kernel/sched.c,v
retrieving revision 1.1.1.1.2.31
diff -u -r1.1.1.1.2.31 sched.c
--- sched.c 1998/12/24 17:19:00 1.1.1.1.2.31
+++ linux/kernel/sched.c 1998/12/25 15:48:13
@@ -46,7 +46,7 @@
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */

/* The current time */
-volatile struct timeval xtime __attribute__ ((aligned (16)));
+volatile struct timeval __xtime __attribute__ ((aligned (16)));

/* Don't completely fail for HZ > 500. */
int tickadj = 500/HZ ? : 1; /* microsecs */
@@ -958,16 +958,16 @@
break;

case TIME_INS:
- if (xtime.tv_sec % 86400 == 0) {
- xtime.tv_sec--;
+ if (__xtime.tv_sec % 86400 == 0) {
+ __xtime.tv_sec--;
time_state = TIME_OOP;
printk("Clock: inserting leap second 23:59:60 UTC\n");
}
break;

case TIME_DEL:
- if ((xtime.tv_sec + 1) % 86400 == 0) {
- xtime.tv_sec++;
+ if ((__xtime.tv_sec + 1) % 86400 == 0) {
+ __xtime.tv_sec++;
time_state = TIME_WAIT;
printk("Clock: deleting leap second 23:59:59 UTC\n");
}
@@ -1051,14 +1051,14 @@
if (time_phase <= -FINEUSEC) {
long ltemp = -time_phase >> SHIFT_SCALE;
time_phase += ltemp << SHIFT_SCALE;
- xtime.tv_usec += tick + time_adjust_step - ltemp;
+ __xtime.tv_usec += tick + time_adjust_step - ltemp;
}
else if (time_phase >= FINEUSEC) {
long ltemp = time_phase >> SHIFT_SCALE;
time_phase -= ltemp << SHIFT_SCALE;
- xtime.tv_usec += tick + time_adjust_step + ltemp;
+ __xtime.tv_usec += tick + time_adjust_step + ltemp;
} else
- xtime.tv_usec += tick + time_adjust_step;
+ __xtime.tv_usec += tick + time_adjust_step;

if (time_adjust) {
/* We are doing an adjtime thing.
@@ -1098,9 +1098,9 @@
update_wall_time_one_tick();
} while (ticks);

- if (xtime.tv_usec >= 1000000) {
- xtime.tv_usec -= 1000000;
- xtime.tv_sec++;
+ if (__xtime.tv_usec >= 1000000) {
+ __xtime.tv_usec -= 1000000;
+ __xtime.tv_sec++;
second_overflow();
}
}
Index: kernel/time.c
===================================================================
RCS file: /var/cvs/linux/kernel/time.c,v
retrieving revision 1.1.1.1.2.1
diff -u -r1.1.1.1.2.1 time.c
--- time.c 1998/11/27 10:41:41 1.1.1.1.2.1
+++ linux/kernel/time.c 1998/12/25 16:15:34
@@ -32,7 +32,7 @@

static void do_normal_gettime(struct timeval * tm)
{
- *tm=xtime;
+ *tm=get_xtime();
}

void (*do_get_fast_time)(struct timeval *) = do_normal_gettime;
@@ -86,8 +86,11 @@
if (get_user(value, tptr))
return -EFAULT;
cli();
- xtime.tv_sec = value;
- xtime.tv_usec = 0;
+ /* we just have irqs locally disabled -arca */
+ write_lock(&xtime_lock);
+ __xtime.tv_sec = value;
+ __xtime.tv_usec = 0;
+ write_unlock(&xtime_lock);
time_state = TIME_ERROR;
time_maxerror = MAXPHASE;
time_esterror = MAXPHASE;
@@ -130,9 +133,9 @@
*/
inline static void warp_clock(void)
{
- cli();
- xtime.tv_sec += sys_tz.tz_minuteswest * 60;
- sti();
+ write_lock_irq(&xtime_lock);
+ __xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+ write_unlock_irq(&xtime_lock);
}

/*
@@ -231,6 +234,7 @@
return -EINVAL;

cli(); /* SMP: global cli() is enough protection. */
+ read_lock(&xtime_lock);

/* Save for later - semantics of adjtime is to return old value */
save_adjust = time_adjust;
@@ -286,9 +290,9 @@
*/

if (time_status & STA_FREQHOLD || time_reftime == 0)
- time_reftime = xtime.tv_sec;
- mtemp = xtime.tv_sec - time_reftime;
- time_reftime = xtime.tv_sec;
+ time_reftime = __xtime.tv_sec;
+ mtemp = __xtime.tv_sec - time_reftime;
+ time_reftime = __xtime.tv_sec;
if (time_status & STA_FLL)
{
if (mtemp >= MINSEC)
@@ -334,7 +338,7 @@
txc->constant = time_constant;
txc->precision = time_precision;
txc->tolerance = time_tolerance;
- txc->time = xtime;
+ txc->time = __xtime;
txc->tick = tick;
txc->ppsfreq = pps_freq;
txc->jitter = pps_jitter;
@@ -345,6 +349,7 @@
txc->errcnt = pps_errcnt;
txc->stbcnt = pps_stbcnt;

+ read_unlock(&xtime_lock);
sti();
return 0;
}
Index: net/core/dev.c
===================================================================
RCS file: /var/cvs/linux/net/core/dev.c,v
retrieving revision 1.1.1.1.2.4
diff -u -r1.1.1.1.2.4 dev.c
--- dev.c 1998/12/20 15:51:33 1.1.1.1.2.4
+++ linux/net/core/dev.c 1998/12/25 15:45:55
@@ -752,7 +752,7 @@
if(skb->stamp.tv_sec==0)
get_fast_time(&skb->stamp);
#else
- skb->stamp = xtime;
+ skb->stamp = get_xtime();
#endif

/* The code is rearranged so that the path is the most
Index: net/ipv4/ip_forward.c
===================================================================
RCS file: /var/cvs/linux/net/ipv4/ip_forward.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 ip_forward.c
--- ip_forward.c 1998/11/19 23:01:50 1.1.1.1
+++ linux/net/ipv4/ip_forward.c 1998/12/25 16:17:23
@@ -97,7 +97,7 @@
#ifdef CONFIG_CPU_IS_SLOW
if (net_cpu_congestion > 1 && !(iph->tos&IPTOS_RELIABILITY) &&
IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) {
- if (((xtime.tv_usec&0xF)<<net_cpu_congestion) > 0x1C)
+ if (((get_xtime().tv_usec&0xF)<<net_cpu_congestion) > 0x1C)
goto drop;
}
#endif
Index: net/sunrpc/svcauth_des.c
===================================================================
RCS file: /var/cvs/linux/net/sunrpc/svcauth_des.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 svcauth_des.c
--- svcauth_des.c 1998/11/19 23:01:57 1.1.1.1
+++ linux/net/sunrpc/svcauth_des.c 1998/12/25 16:17:32
@@ -175,7 +175,7 @@
#endif
}

- now = xtime;
+ now = get_xtime();
now.tv_secs -= data->dc_window;
if (now.tv_secs < cryptbuf[0] ||
(now.tv_secs == cryptbuf[0] && now.tv_usec < cryptbuf[1]))

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/