diff -Naurp linux-2.5.72/include/linux/sched.h linux-2.5.72-test/include/linux/sched.h --- linux-2.5.72/include/linux/sched.h 2003-06-18 22:47:19.000000000 +1000 +++ linux-2.5.72-test/include/linux/sched.h 2003-06-19 20:56:18.000000000 +1000 @@ -332,6 +332,7 @@ struct task_struct { unsigned long sleep_avg; unsigned long last_run; + unsigned long best_sleep_avg; unsigned long policy; unsigned long cpus_allowed; --- linux-2.5.72/kernel/sched.c 2003-06-18 22:47:25.000000000 +1000 +++ linux-2.5.72-test/kernel/sched.c 2003-06-23 07:21:07.000000000 +1000 @@ -72,7 +72,8 @@ #define EXIT_WEIGHT 3 #define PRIO_BONUS_RATIO 25 #define INTERACTIVE_DELTA 2 -#define MAX_SLEEP_AVG (10*HZ) +#define MAX_SLEEP_AVG (2 * HZ) +#define BEST_SLEEP_DECAY (10) #define STARVATION_LIMIT (10*HZ) #define NODE_THRESHOLD 125 @@ -313,12 +314,22 @@ static inline void enqueue_task(struct t */ static int effective_prio(task_t *p) { - int bonus, prio; + int bonus, prio, neg_flag = 1, scale = MAX_SLEEP_AVG / 2; if (rt_task(p)) return p->prio; - bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 - + bonus = p->best_sleep_avg/BEST_SLEEP_DECAY; + if (bonus > MAX_SLEEP_AVG) bonus = MAX_SLEEP_AVG; + + bonus -= scale; + if (bonus < 0) neg_flag = -1; + bonus *= bonus; + bonus /= scale; + bonus *= neg_flag; + bonus += scale; + + bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*bonus/MAX_SLEEP_AVG/100 - MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2; prio = p->static_prio - bonus; @@ -371,6 +382,8 @@ static inline void activate_task(task_t sleep_avg = MAX_SLEEP_AVG; if (p->sleep_avg != sleep_avg) { p->sleep_avg = sleep_avg; + if ((sleep_avg * BEST_SLEEP_DECAY) > p->best_sleep_avg) + p->best_sleep_avg = sleep_avg * (BEST_SLEEP_DECAY + 1) - 1; p->prio = effective_prio(p); } } @@ -551,6 +564,7 @@ void wake_up_forked_process(task_t * p) */ current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100; p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; + p->best_sleep_avg = p->sleep_avg * (BEST_SLEEP_DECAY + 1) - 1; p->prio = effective_prio(p); set_task_cpu(p, smp_processor_id()); @@ -1200,6 +1214,8 @@ void scheduler_tick(int user_ticks, int */ if (p->sleep_avg) p->sleep_avg--; + if (p->best_sleep_avg) + p->best_sleep_avg--; if (unlikely(rt_task(p))) { /* * RR tasks need a special form of timeslice management. @@ -1229,6 +1245,27 @@ void scheduler_tick(int user_ticks, int enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); + } else { + /* + * Prevent a too long timeslice allowing a task to monopolize + * the CPU. We do this by splitting up the timeslice into + * smaller pieces. + * + * Note: this does not mean the task's timeslices expire or + * get lost in any way, they just might be preempted by + * another task of equal priority. (one with higher + * priority would have preempted this task already.) We + * requeue this task to the end of the list on this priority + * level, which is in essence a round-robin of tasks with + * equal priority. + */ + if (!(p->time_slice % MIN_TIMESLICE) && + (p->array == rq->active)) { + dequeue_task(p, rq->active); + set_tsk_need_resched(p); + p->prio = effective_prio(p); + enqueue_task(p, rq->active); + } } out_unlock: spin_unlock(&rq->lock);