--- include/linux/sched.h.org Tue Jan 3 09:26:50 2006 +++ include/linux/sched.h Sat Jan 7 14:45:37 2006 @@ -701,8 +701,8 @@ unsigned short ioprio; - unsigned long sleep_avg; - unsigned long long timestamp, last_ran; + unsigned long sleep_avg, slice_avg; + unsigned long long timestamp, last_ran, last_slice; unsigned long long sched_time; /* sched_clock time spent running */ int activated; --- linux-2.6.15/kernel/sched.c.org Sat Jan 7 16:22:13 2006 +++ linux-2.6.15/kernel/sched.c Mon Jan 9 11:50:40 2006 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1353,7 +1354,7 @@ out_activate: #endif /* CONFIG_SMP */ - if (old_state == TASK_UNINTERRUPTIBLE) { + if (old_state & TASK_UNINTERRUPTIBLE) { rq->nr_uninterruptible--; /* * Tasks on involuntary sleep don't earn @@ -1492,6 +1493,8 @@ */ p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); + p->slice_avg = NS_MAX_SLEEP_AVG; + p->last_slice = sched_clock(); p->prio = effective_prio(p); @@ -2646,6 +2649,12 @@ runqueue_t *rq = this_rq(); task_t *p = current; unsigned long long now = sched_clock(); +#if 1 + static unsigned long printme = 0; + + if (unlikely(!printme)) + printme = jiffies; +#endif update_cpu_clock(p, rq, now); @@ -2679,6 +2688,7 @@ if ((p->policy == SCHED_RR) && !--p->time_slice) { p->time_slice = task_timeslice(p); p->first_time_slice = 0; + p->last_slice = now; set_tsk_need_resched(p); /* put it at the end of the queue: */ @@ -2687,12 +2697,40 @@ goto out_unlock; } if (!--p->time_slice) { + unsigned long long nsecs = now - p->last_slice; + unsigned long idle, ticks; + int w = 10; + dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); p->time_slice = task_timeslice(p); p->first_time_slice = 0; + if (nsecs > ~0UL) + nsecs = ~0UL; + ticks = NS_TO_JIFFIES((unsigned long) nsecs); + if (ticks < p->time_slice) + ticks = p->time_slice; + idle = 100 - (100 * p->time_slice / ticks); + p->slice_avg /= NS_MAX_SLEEP_AVG / 100; + /* + * If we're improving our behaviour, speed up the improvement's + * effect so we don't over throttle. + */ + if (idle > p->slice_avg + 10) + w -= (100 * p->slice_avg / idle) / 10; + p->slice_avg = (w * p->slice_avg + idle) / (w + 1); + p->slice_avg *= NS_MAX_SLEEP_AVG / 100; + p->last_slice = now; +#if 1 + if (p->mm && time_after(jiffies, printme + HZ)) { + printk(KERN_DEBUG"%s pid:%d sle:%ld sli:%ld tic:%ld idle:%ld w:%d\n", + p->comm,p->pid,p->sleep_avg,p->slice_avg,ticks,idle,w); + printme = jiffies + HZ; + } +#endif + if (!rq->expired_timestamp) rq->expired_timestamp = jiffies; if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { @@ -3010,7 +3048,7 @@ unlikely(signal_pending(prev)))) prev->state = TASK_RUNNING; else { - if (prev->state == TASK_UNINTERRUPTIBLE) + if (prev->state & TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; deactivate_task(prev, rq); } @@ -3095,6 +3133,13 @@ prev->sleep_avg -= run_time; if ((long)prev->sleep_avg <= 0) prev->sleep_avg = 0; + if (prev->state & (TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE) && + prev->sleep_avg > prev->slice_avg + (NS_MAX_SLEEP_AVG/10) && + !rt_task(prev)) + prev->state |= TASK_NONINTERACTIVE; + if (!rt_task(next) && !(next->time_slice % DEF_TIMESLICE)) + next->last_slice = now; + prev->timestamp = prev->last_ran = now; sched_info_switch(prev, next);