diff -urN linux-2.4.3/include/linux/sched.h linux-2.4.3-s390/include/linux/sched.h --- linux-2.4.3/include/linux/sched.h Tue Mar 27 01:48:11 2001 +++ linux-2.4.3-s390/include/linux/sched.h Thu Apr 5 10:23:48 2001 @@ -144,6 +144,7 @@ extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); +extern void update_process_times2(int user, int system); extern void update_one_process(struct task_struct *p, unsigned long user, unsigned long system, int cpu); @@ -534,7 +535,9 @@ #include +#ifndef CONFIG_NO_HZ_TIMER extern unsigned long volatile jiffies; +#endif extern unsigned long itimer_ticks; extern unsigned long itimer_next; extern struct timeval xtime; diff -urN linux-2.4.3/include/linux/time.h linux-2.4.3-s390/include/linux/time.h --- linux-2.4.3/include/linux/time.h Tue Mar 27 01:48:10 2001 +++ linux-2.4.3-s390/include/linux/time.h Thu Apr 5 10:23:48 2001 @@ -42,10 +42,10 @@ } static __inline__ void -jiffies_to_timespec(unsigned long jiffies, struct timespec *value) +jiffies_to_timespec(unsigned long _jiffies, struct timespec *value) { - value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); - value->tv_sec = jiffies / HZ; + value->tv_nsec = (_jiffies % HZ) * (1000000000L / HZ); + value->tv_sec = _jiffies / HZ; } diff -urN linux-2.4.3/include/linux/timer.h linux-2.4.3-s390/include/linux/timer.h --- linux-2.4.3/include/linux/timer.h Tue Mar 27 01:48:10 2001 +++ linux-2.4.3-s390/include/linux/timer.h Thu Apr 5 10:23:48 2001 @@ -35,6 +35,18 @@ #define sync_timers() do { } while (0) #endif +#ifdef CONFIG_NO_HZ_TIMER +/* + * Setting timer_notify to something != NULL will make + * the timer routines call the notification routine + * whenever a new add_timer/mod_timer has set a new + * soonest timer event. + */ +extern void (*timer_notify)(unsigned long expires); +extern void (*itimer_notify)(void); +extern void update_times_irqsave(void); +#endif + /* * mod_timer is a more efficient way to update the expire field of an * active timer (if the timer is inactive it will be activated) diff -urN linux-2.4.3/kernel/itimer.c linux-2.4.3-s390/kernel/itimer.c --- linux-2.4.3/kernel/itimer.c Thu Jun 29 19:07:36 2000 +++ linux-2.4.3-s390/kernel/itimer.c Thu Apr 5 10:23:48 2001 @@ -34,10 +34,10 @@ return HZ*sec+usec; } -static void jiffiestotv(unsigned long jiffies, struct timeval *value) +static void jiffiestotv(unsigned long _jiffies, struct timeval *value) { - value->tv_usec = (jiffies % HZ) * (1000000 / HZ); - value->tv_sec = jiffies / HZ; + value->tv_usec = (_jiffies % HZ) * (1000000 / HZ); + value->tv_sec = _jiffies / HZ; } int do_getitimer(int which, struct itimerval *value) @@ -105,6 +105,16 @@ } } +#ifdef CONFIG_NO_HZ_TIMER +void (*itimer_notify)(void) = NULL; + +static inline void do_itimer_notify(void) +{ + if (itimer_notify != NULL) + (*itimer_notify)(); +} +#endif + int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { register unsigned long i, j; @@ -132,12 +142,18 @@ j++; current->it_virt_value = j; current->it_virt_incr = i; +#ifdef CONFIG_NO_HZ_TIMER + do_itimer_notify(); +#endif break; case ITIMER_PROF: if (j) j++; current->it_prof_value = j; current->it_prof_incr = i; +#ifdef CONFIG_NO_HZ_TIMER + do_itimer_notify(); +#endif break; default: return -EINVAL; diff -urN linux-2.4.3/kernel/ksyms.c linux-2.4.3-s390/kernel/ksyms.c --- linux-2.4.3/kernel/ksyms.c Thu Apr 5 10:23:36 2001 +++ linux-2.4.3-s390/kernel/ksyms.c Thu Apr 5 10:23:48 2001 @@ -429,7 +429,9 @@ EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); EXPORT_SYMBOL(schedule_timeout); +#ifndef CONFIG_NO_HZ_TIMER EXPORT_SYMBOL(jiffies); +#endif EXPORT_SYMBOL(xtime); EXPORT_SYMBOL(do_gettimeofday); EXPORT_SYMBOL(do_settimeofday); diff -urN linux-2.4.3/kernel/timer.c linux-2.4.3-s390/kernel/timer.c --- linux-2.4.3/kernel/timer.c Sun Dec 10 18:53:19 2000 +++ linux-2.4.3-s390/kernel/timer.c Thu Apr 5 10:23:48 2001 @@ -65,7 +65,9 @@ extern int do_setitimer(int, struct itimerval *, struct itimerval *); +#ifndef CONFIG_NO_HZ_TIMER unsigned long volatile jiffies; +#endif unsigned int * prof_buffer; unsigned long prof_len; @@ -173,6 +175,22 @@ #define timer_exit() do { } while (0) #endif +#ifdef CONFIG_NO_HZ_TIMER +void (*timer_notify)(unsigned long) = NULL; +unsigned long notify_jiffy = 0; + +static inline void do_timer_notify(struct timer_list *timer) +{ + if (timer_notify != NULL) { + if (notify_jiffy == 0 || + time_before(timer->expires, notify_jiffy)) { + (*timer_notify)(timer->expires); + notify_jiffy = timer->expires; + } + } +} +#endif + void add_timer(struct timer_list *timer) { unsigned long flags; @@ -181,6 +199,9 @@ if (timer_pending(timer)) goto bug; internal_add_timer(timer); +#ifdef CONFIG_NO_HZ_TIMER + do_timer_notify(timer); +#endif spin_unlock_irqrestore(&timerlist_lock, flags); return; bug: @@ -206,6 +227,9 @@ timer->expires = expires; ret = detach_timer(timer); internal_add_timer(timer); +#ifdef CONFIG_NO_HZ_TIMER + do_timer_notify(timer); +#endif spin_unlock_irqrestore(&timerlist_lock, flags); return ret; } @@ -323,6 +347,89 @@ spin_unlock_irq(&timerlist_lock); } +#ifdef CONFIG_NO_HZ_TIMER +/* + * Check timer list for earliest timer + */ +static inline struct timer_list * +earlier_timer_in_list(struct list_head *head, struct timer_list *event) +{ + struct list_head *curr; + + if (list_empty(head)) + return event; + curr = head->next; + if (event == NULL) { + event = list_entry(curr, struct timer_list, list); + curr = curr->next; + } + while (curr != head) { + struct timer_list * tmp; + + tmp = list_entry(curr, struct timer_list, list); + if (time_before(tmp->expires, event->expires)) + event = tmp; + curr = curr->next; + } + return event; +} + +/* + * Find out when the next timer event is due to happen. This + * is used on S/390 to be able to skip timer ticks. + * The timerlist_lock must be acquired before calling this function. + */ +struct timer_list *next_timer_event(void) +{ + struct timer_list *nte = NULL; + int i; + + /* Look for the next timer event in tv1. */ + i = tv1.index; + do { + struct list_head *head = tv1.vec + i; + if (!list_empty(head)) { + nte = list_entry(head->next, struct timer_list, list); + if (i < tv1.index) { + /* + * The search wrapped. We need to look + * at the next list from tvecs[1] that + * would cascade into tv1. + */ + head = tvecs[1]->vec + tvecs[1]->index; + nte = earlier_timer_in_list(head, nte); + } + goto out; + } + i = (i + 1) & TVR_MASK; + } while (i != tv1.index); + + /* No event found in tv1. Check tv2-tv5. */ + for (i = 1; i < NOOF_TVECS; i++) { + int j = tvecs[i]->index; + do { + struct list_head *head = tvecs[i]->vec + j; + nte = earlier_timer_in_list(head, NULL); + if (nte) { + if (j < tvecs[i]->index && i < NOOF_TVECS-1) { + /* + * The search wrapped. We need to look + * at the next list from tvecs[i+1] + * that would cascade into tvecs[i]. + */ + head = tvecs[i+1]->vec+tvecs[i+1]->index; + nte = earlier_timer_in_list(head, nte); + } + goto out; + } + j = (j + 1) & TVN_MASK; + } while (j != tvecs[i]->index); + } + out: + return nte; +} +#endif + spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; void tqueue_bh(void) @@ -458,8 +565,13 @@ #endif } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +/* + * The ticks loop used in the past is gone because with + * the CONFIG_NO_HZ_TIMER config option on S/390 it is + * possible that ticks is a lot bigger than one. + * -- martin + */ +static void update_wall_time(unsigned long ticks) { if ( (time_adjust_step = time_adjust) != 0 ) { /* We are doing an adjtime thing. @@ -470,21 +582,22 @@ * * Limit the amount of the step to be in the range * -tickadj .. +tickadj + * per tick. */ - if (time_adjust > tickadj) - time_adjust_step = tickadj; - else if (time_adjust < -tickadj) - time_adjust_step = -tickadj; + if (time_adjust > tickadj*ticks) + time_adjust_step = tickadj*ticks; + else if (time_adjust < -tickadj*ticks) + time_adjust_step = -tickadj*ticks; /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; } - xtime.tv_usec += tick + time_adjust_step; + xtime.tv_usec += tick*ticks + time_adjust_step; /* * Advance the phase, once it gets to one microsecond, then * advance the tick more. */ - time_phase += time_adj; + time_phase += time_adj*ticks; if (time_phase <= -FINEUSEC) { long ltemp = -time_phase >> SHIFT_SCALE; time_phase += ltemp << SHIFT_SCALE; @@ -495,21 +608,6 @@ time_phase -= ltemp << SHIFT_SCALE; xtime.tv_usec += ltemp; } -} - -/* - * Using a loop looks inefficient, but "ticks" is - * usually just one (we shouldn't be losing ticks, - * we're doing this this way mainly for interrupt - * latency reasons, not because we think we'll - * have lots of lost timer ticks - */ -static void update_wall_time(unsigned long ticks) -{ - do { - ticks--; - update_wall_time_one_tick(); - } while (ticks); if (xtime.tv_usec >= 1000000) { xtime.tv_usec -= 1000000; @@ -527,7 +625,7 @@ psecs += (p->times.tms_stime += system); if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { /* Send SIGXCPU every second.. */ - if (!(psecs % HZ)) + if ((psecs % HZ) < user+system) send_sig(SIGXCPU, p, 1); /* and SIGKILL when we go over max.. */ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) @@ -540,24 +638,25 @@ unsigned long it_virt = p->it_virt_value; if (it_virt) { - it_virt -= ticks; - if (!it_virt) { + if (it_virt <= ticks) { it_virt = p->it_virt_incr; send_sig(SIGVTALRM, p, 1); - } + } else + it_virt -= ticks; p->it_virt_value = it_virt; } } -static inline void do_it_prof(struct task_struct *p) +static inline void do_it_prof(struct task_struct *p, unsigned long ticks) { unsigned long it_prof = p->it_prof_value; if (it_prof) { - if (--it_prof == 0) { + if (it_prof <= ticks) { it_prof = p->it_prof_incr; send_sig(SIGPROF, p, 1); - } + } else + it_prof -= ticks; p->it_prof_value = it_prof; } } @@ -569,7 +668,7 @@ p->per_cpu_stime[cpu] += system; do_process_times(p, user, system); do_it_virt(p, user); - do_it_prof(p); + do_it_prof(p, user + system); } /* @@ -597,6 +696,31 @@ } /* + * Called from the timer interrupt handler to charge a couple of + * system and user ticks. + */ +void update_process_times2(int user, int system) +{ + struct task_struct *p = current; + int cpu = smp_processor_id(); + + update_one_process(p, user, system, cpu); + if (p->pid) { + p->counter -= user + system; + if (p->counter <= 0) { + p->counter = 0; + p->need_resched = 1; + } + if (p->nice > 0) + kstat.per_cpu_nice[cpu] += user; + else + kstat.per_cpu_user[cpu] += user; + kstat.per_cpu_system[cpu] += system; + } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) + kstat.per_cpu_system[cpu] += system; +} + +/* * Nr of active tasks - counted in fixed-point numbers */ static unsigned long count_active_tasks(void) @@ -628,7 +752,7 @@ static int count = LOAD_FREQ; count -= ticks; - if (count < 0) { + while (count < 0) { count += LOAD_FREQ; active_tasks = count_active_tasks(); CALC_LOAD(avenrun[0], EXP_1, active_tasks); @@ -650,7 +774,7 @@ unsigned long ticks; /* - * update_times() is run from the raw timer_bh handler so we + * do_update_times() is run from the raw timer_bh handler so we * just know that the irqs are locally enabled and so we don't * need to save/restore the flags of the local CPU here. -arca */ @@ -665,12 +789,49 @@ calc_load(ticks); } +void update_times_irqsave(void) +{ + unsigned long ticks; + unsigned long flags; + + /* + * do_update_times() is run from the raw timer_bh handler so we + * just know that the irqs are locally enabled and so we don't + * need to save/restore the flags of the local CPU here. -arca + */ + write_lock_irqsave(&xtime_lock, flags); + + ticks = jiffies - wall_jiffies; + if (ticks) { + wall_jiffies += ticks; + update_wall_time(ticks); + } + write_unlock_irqrestore(&xtime_lock, flags); + calc_load(ticks); +} + void timer_bh(void) { update_times(); run_timer_list(); +#ifdef CONFIG_NO_HZ_TIMER + if (timer_notify != NULL) { + struct timer_list *timer; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + timer = next_timer_event(); + if (timer != NULL) { + (*timer_notify)(timer->expires); + notify_jiffy = timer->expires; + } else + notify_jiffy = 0; + spin_unlock_irqrestore(&timerlist_lock, flags); + } +#endif } +#ifndef CONFIG_NO_HZ_TIMER void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; @@ -683,6 +844,7 @@ if (TQ_ACTIVE(tq_timer)) mark_bh(TQUEUE_BH); } +#endif #if !defined(__alpha__) && !defined(__ia64__)