diff -urN linux-2.5.65.virgin/include/linux/sched.h linux-2.5.65/include/linux/sched.h --- linux-2.5.65.virgin/include/linux/sched.h Thu Mar 20 22:11:52 2003 +++ linux-2.5.65/include/linux/sched.h Tue Mar 18 19:19:38 2003 @@ -328,7 +328,8 @@ prio_array_t *array; unsigned long sleep_avg; - unsigned long last_run; + unsigned long sleep_begin; + unsigned long sleep_end; unsigned long policy; unsigned long cpus_allowed; diff -urN linux-2.5.65.virgin/kernel/fork.c linux-2.5.65/kernel/fork.c --- linux-2.5.65.virgin/kernel/fork.c Thu Mar 20 22:11:54 2003 +++ linux-2.5.65/kernel/fork.c Tue Mar 18 19:23:00 2003 @@ -918,7 +918,7 @@ */ p->first_time_slice = 1; current->time_slice >>= 1; - p->last_run = jiffies; + p->sleep_begin = p->sleep_end = jiffies; if (!current->time_slice) { /* * This case is rare, it happens when the parent has only diff -urN linux-2.5.65.virgin/kernel/printk.c linux-2.5.65/kernel/printk.c --- linux-2.5.65.virgin/kernel/printk.c Thu Mar 20 22:11:54 2003 +++ linux-2.5.65/kernel/printk.c Wed Mar 19 06:37:45 2003 @@ -510,8 +510,10 @@ console_may_schedule = 0; up(&console_sem); spin_unlock_irqrestore(&logbuf_lock, flags); +#if 0 // MIKEDIDIT if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) wake_up_interruptible(&log_wait); +#endif } /** console_conditional_schedule - yield the CPU if required diff -urN linux-2.5.65.virgin/kernel/sched.c linux-2.5.65/kernel/sched.c --- linux-2.5.65.virgin/kernel/sched.c Thu Mar 20 22:11:54 2003 +++ linux-2.5.65/kernel/sched.c Thu Mar 20 15:13:34 2003 @@ -67,12 +67,13 @@ #define MIN_TIMESLICE ( 10 * HZ / 1000) #define MAX_TIMESLICE (200 * HZ / 1000) #define CHILD_PENALTY 50 -#define PARENT_PENALTY 100 +#define PARENT_PENALTY 85 #define EXIT_WEIGHT 3 #define PRIO_BONUS_RATIO 25 #define INTERACTIVE_DELTA 2 #define MAX_SLEEP_AVG (10*HZ) -#define STARVATION_LIMIT (10*HZ) +#define STARVATION_LIMIT (1*MAX_TIMESLICE) +#define TIMESLICE_GRANULARITY (HZ/20 ?: 1) #define NODE_THRESHOLD 125 /* @@ -332,59 +333,27 @@ { enqueue_task(p, rq->active); nr_running_inc(rq); + p->sleep_end = jiffies; } /* - * activate_task - move a task to the runqueue and do priority recalculation - * - * Update all the scheduling statistics stuff. (sleep average - * calculation, priority modifiers, etc.) + * activate_task - move a task to the runqueue and do priority + * recalculation. If the waker is maximum-interactive, give an + * additional boost to the sleeper as well. This has the effect + * of boosting tasks which are related to interactive task. */ static inline int activate_task(task_t *p, runqueue_t *rq) { - long sleep_time = jiffies - p->last_run - 1; - int requeue_waker = 0; - - if (sleep_time > 0) { - int sleep_avg; - - /* - * This code gives a bonus to interactive tasks. - * - * The boost works by updating the 'average sleep time' - * value here, based on ->last_run. The more time a task - * spends sleeping, the higher the average gets - and the - * higher the priority boost gets as well. - */ - sleep_avg = p->sleep_avg + sleep_time; - - /* - * 'Overflow' bonus ticks go to the waker as well, so the - * ticks are not lost. This has the effect of further - * boosting tasks that are related to maximum-interactive - * tasks. - */ - if (sleep_avg > MAX_SLEEP_AVG) { - if (!in_interrupt()) { - sleep_avg += current->sleep_avg - MAX_SLEEP_AVG; - if (sleep_avg > MAX_SLEEP_AVG) - sleep_avg = MAX_SLEEP_AVG; - - if (current->sleep_avg != sleep_avg) { - current->sleep_avg = sleep_avg; - requeue_waker = 1; - } - } - sleep_avg = MAX_SLEEP_AVG; - } - if (p->sleep_avg != sleep_avg) { - p->sleep_avg = sleep_avg; - p->prio = effective_prio(p); - } + int requeue_waker = in_interrupt(); + if (!requeue_waker && current->sleep_avg == MAX_SLEEP_AVG) { + p->sleep_avg += TIMESLICE_GRANULARITY; + if (p->sleep_avg > MAX_SLEEP_AVG) + p->sleep_avg = MAX_SLEEP_AVG; } + p->prio = effective_prio(p); __activate_task(p, rq); - return requeue_waker; + return requeue_waker ? 0 : TASK_INTERACTIVE(p); } /* @@ -397,6 +366,7 @@ rq->nr_uninterruptible++; dequeue_task(p, p->array); p->array = NULL; + p->sleep_begin = jiffies; } /* @@ -1063,7 +1033,7 @@ */ #define CAN_MIGRATE_TASK(p,rq,this_cpu) \ - ((jiffies - (p)->last_run > cache_decay_ticks) && \ + ((jiffies - (p)->sleep_end > cache_decay_ticks) && \ !task_running(rq, p) && \ ((p)->cpus_allowed & (1UL << (this_cpu)))) @@ -1176,10 +1146,17 @@ * load-dependent, as the frequency of array switched decreases with * increasing number of running tasks: */ +#if 0 #define EXPIRED_STARVING(rq) \ (STARVATION_LIMIT && ((rq)->expired_timestamp && \ (jiffies - (rq)->expired_timestamp >= \ STARVATION_LIMIT * ((rq)->nr_running) + 1))) +#else +#define EXPIRED_STARVING(rq) \ + (STARVATION_LIMIT && ((rq)->expired_timestamp && \ + (jiffies - (rq)->expired_timestamp >= \ + STARVATION_LIMIT * ((rq)->active->nr_active) + 1))) +#endif /* * This function gets called by the timer code, with HZ frequency. @@ -1194,6 +1171,11 @@ runqueue_t *rq = this_rq(); task_t *p = current; + /* Update sleep average. */ + if (p->sleep_avg) + p->sleep_avg--; + p->sleep_begin = p->sleep_end = jiffies; + if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_ticks); @@ -1221,15 +1203,12 @@ } spin_lock(&rq->lock); /* - * The task was running during this tick - update the - * time slice counter and the sleep average. Note: we - * do not update a thread's priority until it either - * goes to sleep or uses up its timeslice. This makes - * it possible for interactive tasks to use up their - * timeslices at their highest priority levels. + * The task was running during this tick - update the time + * slice counter. Note: we do not update a thread's priority + * until it either goes to sleep or uses up its timeslice. + * This makes it possible for interactive tasks to use up + * their timeslices at their highest priority levels. */ - if (p->sleep_avg) - p->sleep_avg--; if (unlikely(rt_task(p))) { /* * RR tasks need a special form of timeslice management. @@ -1259,6 +1238,29 @@ enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); + } else { + /* + * Prevent a too long timeslice from monopolizing the CPU, + * by splitting up the timeslice into smaller pieces. + * + * Note: this does not mean the task's timeslices expire or + * get lost in any way, they just might be preempted by + * another task of equal priority. (one with higher + * priority would have preempted this task already.) We + * requeue this task to the end of the list on this priority + * level, which is in essence a round-robin of tasks with + * equal priority. + */ + if (!(p->time_slice % TIMESLICE_GRANULARITY) && + (p->array == rq->active)) { + dequeue_task(p, rq->active); + set_tsk_need_resched(p); + p->prio = effective_prio(p); + enqueue_task(p, rq->active); +#if 1 // MIKEDIDIT + p->sleep_begin += (TIMESLICE_GRANULARITY * rq->active->nr_active); +#endif + } } out: spin_unlock(&rq->lock); @@ -1297,7 +1299,6 @@ rq = this_rq(); release_kernel_lock(prev); - prev->last_run = jiffies; spin_lock_irq(&rq->lock); /* @@ -1351,6 +1352,8 @@ RCU_qsctr(prev->thread_info->cpu)++; if (likely(prev != next)) { + static unsigned long time = INITIAL_JIFFIES; // MIKEDIDIT + long slept = 0; rq->nr_switches++; rq->curr = next; @@ -1359,6 +1362,30 @@ barrier(); finish_task_switch(prev); + /* + * Update sleep_avg. Set a limit of MAX_TIMESLICE, and + * try to detect cpu hogs which are doing round robin. + * No sleep bonus for them. + */ +#if 0 + next->sleep_end = prev->sleep_begin = jiffies; +#endif + slept = next->sleep_end - next->sleep_begin - 1; + if (slept > 0) { + if (slept > MAX_TIMESLICE) + slept = MAX_TIMESLICE; + next->sleep_avg += slept; + if (next->sleep_avg > MAX_SLEEP_AVG) + next->sleep_avg = MAX_SLEEP_AVG; + } +#if 0 + next->sleep_begin = jiffies; +#endif + if (time_after(jiffies, time)) { + time = jiffies + HZ + TIMESLICE_GRANULARITY; + printk(KERN_DEBUG "pid %d: slept: %ld avg: %lu\n", + next->pid, slept, next->sleep_avg); + } } else spin_unlock_irq(&rq->lock);