On Tue, 2002-11-19 at 17:06, Michael Hohnbaum wrote:
> Running a large application that issues many gettimeofday()
> system calls on a kernel running with notsc, results in time
> slowing way down. I've seen the system time advance only
> three minutes over a 30 minute period. The following program,
> executed twice demonstrates the problem. Three instances running
> in parallel made a 16 processor machine completely unusable.
[snip]
> I've recreated on 2.5.30, 2.5.44, and 2.5.47. Running a system that
[snip]
> I assume there is a lock starvation problem happening here, correct?
Most likely xtime_lock starvation. I believe other folks were hitting
this earlier. What do folks think about the attached (and completely
untested, probably breaks x86-64) patch? I'm yanking the read_lock for a
vxtime_sequence lock, as implemented in the x86-64 vsyscall code. This
should alleviate the writer starvation, although no doubt it still has a
few gotchas in it. Also I still need to vxtime_lock all the other
instances of write locking the xtime_lock, but its a start.
comments, flames?
-john
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c Tue Nov 19 19:30:05 2002
+++ b/arch/i386/kernel/time.c Tue Nov 19 19:30:05 2002
@@ -85,19 +85,19 @@
*/
void do_gettimeofday(struct timeval *tv)
{
- unsigned long flags;
- unsigned long usec, sec;
+ unsigned long usec, sec, sequence;
- read_lock_irqsave(&xtime_lock, flags);
- usec = timer->get_offset();
- {
- unsigned long lost = jiffies - wall_jiffies;
- if (lost)
- usec += lost * (1000000 / HZ);
- }
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- read_unlock_irqrestore(&xtime_lock, flags);
+ sequence = vxtime_sequence[1];
+ do {
+ usec = timer->get_offset();
+ {
+ unsigned long lost = jiffies - wall_jiffies;
+ if (lost)
+ usec += lost * (1000000 / HZ);
+ }
+ sec = xtime.tv_sec;
+ usec += (xtime.tv_nsec / 1000);
+ } while(sequence != vxtime_sequence[0]);
while (usec >= 1000000) {
usec -= 1000000;
@@ -111,6 +111,7 @@
void do_settimeofday(struct timeval *tv)
{
write_lock_irq(&xtime_lock);
+ vxtime_lock();
/*
* This is revolting. We need to set "xtime" correctly. However, the
* value in this location is the value at the most recent update of
@@ -127,6 +128,8 @@
xtime.tv_sec = tv->tv_sec;
xtime.tv_nsec = (tv->tv_usec * 1000);
+ vxtime_unlock();
+
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
@@ -278,11 +281,13 @@
* locally disabled. -arca
*/
write_lock(&xtime_lock);
-
+ vxtime_lock();
+
timer->mark_offset();
do_timer_interrupt(irq, NULL, regs);
+ vxtime_unlock();
write_unlock(&xtime_lock);
}
diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c Tue Nov 19 19:30:05 2002
+++ b/arch/i386/kernel/timers/timer_pit.c Tue Nov 19 19:30:05 2002
@@ -61,17 +61,17 @@
static unsigned long get_offset_pit(void)
{
int count;
-
+ unsigned long flags;
static int count_p = LATCH; /* for the first call after boot */
static unsigned long jiffies_p = 0;
/*
- * cache volatile jiffies temporarily; we have IRQs turned off.
+ * cache volatile jiffies temporarily;
+ * IRQs are not turned off, but we'll retry if something changes
*/
unsigned long jiffies_t;
- /* gets recalled with irq locally disabled */
- spin_lock(&i8253_lock);
+ spin_lock_irqsave(&i8253_lock,flags);
/* timer count may underflow right here */
outb_p(0x00, 0x43); /* latch the count ASAP */
@@ -93,7 +93,7 @@
count = LATCH - 1;
}
- spin_unlock(&i8253_lock);
+ spin_unlock_irqrestore(&i8253_lock,flags);
/*
* avoiding timer inconsistencies (they are rare, but they happen)...
diff -Nru a/include/linux/time.h b/include/linux/time.h
--- a/include/linux/time.h Tue Nov 19 19:30:05 2002
+++ b/include/linux/time.h Tue Nov 19 19:30:05 2002
@@ -122,6 +122,11 @@
extern struct timespec xtime;
extern rwlock_t xtime_lock;
+/*unstarvable xtime rwlock*/
+extern long vxtime_sequence[2];
+#define vxtime_lock() do { vxtime_sequence[0]++; wmb(); } while(0)
+#define vxtime_unlock() do { wmb(); vxtime_sequence[1]++; } while (0)
+
static inline unsigned long get_seconds(void)
{
return xtime.tv_sec;
diff -Nru a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c Tue Nov 19 19:30:05 2002
+++ b/kernel/timer.c Tue Nov 19 19:30:05 2002
@@ -761,6 +761,7 @@
*/
rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
unsigned long last_time_offset;
+long vxtime_sequence[2]; /*unstarvable xtime rwlock*/
/*
* This function runs timers and the timer-tq in bottom half context.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sat Nov 23 2002 - 22:00:30 EST