[patch] fairsched for 2.2

From: Rik van Riel (riel@conectiva.com.br)
Date: Wed Apr 05 2000 - 14:27:39 EST


Hi,

here's the fair scheduler patch for 2.2. Difference from the
patches from yesterday is that this version actually contains
one extra (subtle) safety feature that I forgot to backport
from the 2.3 version. AKA: this version is verified to work
perfectly :)

cheers,

Rik

--
The Internet is not a network of computers. It is a network
of people. That is its real strength.

Wanna talk about the kernel? irc.openprojects.net / #kernelnewbies http://www.conectiva.com/ http://www.surriel.com/

--- linux-2.2-15pre16/kernel/sched.c.orig Wed Apr 5 14:52:33 2000 +++ linux-2.2-15pre16/kernel/sched.c Wed Apr 5 14:54:36 2000 @@ -40,6 +40,10 @@ #include <linux/timex.h> +#ifdef CONFIG_FAIRSCHED +int fairsched = 0; /* toggle fair scheduler on/off */ +#endif /* CONFIG_FAIRSCHED */ + /* * kernel variables */ @@ -432,6 +436,7 @@ */ spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; /* second */ rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */ +spinlock_t recalc_lock = SPIN_LOCK_UNLOCKED; /* * Wake up a process. Put it on the run-queue if it's not @@ -772,6 +777,7 @@ /* Do we need to re-calculate counters? */ if (!c) goto recalculate; +skipped_recalculate: /* * from this point on nothing can prevent us from * switching to the next task, save this fact in @@ -832,11 +838,71 @@ { struct task_struct *p; spin_unlock_irq(&runqueue_lock); - read_lock(&tasklist_lock); - for_each_task(p) - p->counter = (p->counter >> 1) + p->priority; - read_unlock(&tasklist_lock); - spin_lock_irq(&runqueue_lock); +#ifdef CONFIG_FAIRSCHED + /* + * Simple, low-overhead fair share scheduler. It works by handing + * out CPU time like we do at the normal recalculation. + * The catch is that we move the list head (where the for_each_task() + * loop starts) to _after_ the first task where we ran out of quota. + * This means that if a user has too many runnable processes, his + * tasks will get extra CPU time here in turns. -- Rik + */ + if (fairsched) { + struct user_struct *up; + long oldcounter; + if (!spin_trylock(&recalc_lock)) { + spin_lock_irq(&runqueue_lock); + goto skipped_recalculate; + } + read_lock(&uidhash_lock); + for_each_user_struct(up) + up->move_task = NULL; + + write_lock_irq(&tasklist_lock); + for_each_task(p) { + up = p->user; + if (!up) { + p->counter = (p->counter >> 1) + p->priority; + continue; + } + if (!up->cpu_ticks) + continue; + /* never dirty a cacheline needlessly */ + if (p->counter != (p->counter >> 1) + p->priority) { + oldcounter = p->counter; + p->counter = (p->counter >> 1) + p->priority; + up->cpu_ticks += oldcounter; + up->cpu_ticks -= p->counter; + if (!up->move_task) + up->move_task = p; + if (up->cpu_ticks < 0) { + p->counter += up->cpu_ticks; + up->cpu_ticks = 0; + } + } + } + + for_each_user_struct(up) { + if (!up->cpu_ticks) { + move_task_last(up); + } + up->cpu_ticks = (up->cpu_ticks >> 1) + DEF_PRIORITY; + } + write_unlock_irq(&tasklist_lock); + read_unlock(&uidhash_lock); + spin_unlock(&recalc_lock); + } else +#endif /* CONFIG_FAIRSCHED */ + { + read_lock(&tasklist_lock); + for_each_task(p) { + /* never dirty a cache line needlessly */ + if (p->counter != (p->counter >> 1) + p->priority) + p->counter = (p->counter >> 1) + p->priority; + } + read_unlock(&tasklist_lock); + } + spin_lock_irq(&runqueue_lock); goto repeat_schedule; } --- linux-2.2-15pre16/kernel/fork.c.orig Tue Oct 26 22:53:42 1999 +++ linux-2.2-15pre16/kernel/fork.c Sun Apr 2 17:33:06 2000 @@ -17,6 +17,7 @@ #include <linux/smp_lock.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/sched.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> @@ -45,13 +46,16 @@ */ #define UIDHASH_SZ (PIDHASH_SZ >> 2) -static struct user_struct { - atomic_t count; - struct user_struct *next, **pprev; - unsigned int uid; -} *uidhash[UIDHASH_SZ]; +struct user_struct *uidhash[UIDHASH_SZ]; +struct user_struct uidhead = { + ATOMIC_INIT(0), + &uidhead, (struct user_struct **) &uidhead, + &uidhead, &uidhead, + NULL, 0, + DEF_PRIORITY, +}; -spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED; +rwlock_t uidhash_lock = RW_LOCK_UNLOCKED; kmem_cache_t *uid_cachep; @@ -66,6 +70,10 @@ uidhash[hashent]->pprev = &up->next; up->pprev = &uidhash[hashent]; uidhash[hashent] = up; + up->l_next = uidhead.l_next; + up->l_prev = &uidhead; + uidhead.l_next->l_prev = up; + uidhead.l_next = up; } static inline void uid_hash_remove(struct user_struct *up) @@ -73,6 +81,8 @@ if(up->next) up->next->pprev = up->pprev; *up->pprev = up->next; + up->l_prev->l_next = up->l_next; + up->l_next->l_prev = up->l_prev; } static inline struct user_struct *uid_hash_find(unsigned short uid, unsigned int hashent) @@ -112,12 +122,12 @@ if (up) { p->user = NULL; if (atomic_dec_and_test(&up->count)) { - spin_lock(&uidhash_lock); + write_lock(&uidhash_lock); if (uid_hash_free(up)) { uid_hash_remove(up); kmem_cache_free(uid_cachep, up); } - spin_unlock(&uidhash_lock); + write_unlock(&uidhash_lock); } } } @@ -127,9 +137,9 @@ unsigned int hashent = uidhashfn(p->uid); struct user_struct *up; - spin_lock(&uidhash_lock); + read_lock(&uidhash_lock); up = uid_hash_find(p->uid, hashent); - spin_unlock(&uidhash_lock); + read_unlock(&uidhash_lock); if (!up) { struct user_struct *new; @@ -139,12 +149,13 @@ return -EAGAIN; new->uid = p->uid; atomic_set(&new->count, 1); + new->cpu_ticks = DEF_PRIORITY; /* * Before adding this, check whether we raced * on adding the same user already.. */ - spin_lock(&uidhash_lock); + write_lock(&uidhash_lock); up = uid_hash_find(p->uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); @@ -152,7 +163,7 @@ uid_hash_insert(new, hashent); up = new; } - spin_unlock(&uidhash_lock); + write_unlock(&uidhash_lock); } p->user = up; --- linux-2.2-15pre16/kernel/sysctl.c.orig Wed Mar 29 15:40:09 2000 +++ linux-2.2-15pre16/kernel/sysctl.c Thu Mar 30 15:23:04 2000 @@ -38,6 +38,7 @@ extern char binfmt_java_interpreter[], binfmt_java_appletviewer[]; extern int sysctl_overcommit_memory; extern int nr_queued_signals, max_queued_signals; +extern int fairsched; #ifdef CONFIG_KMOD extern char modprobe_path[]; @@ -231,6 +232,10 @@ {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int), 0644, NULL, &proc_dointvec}, #endif +#ifdef CONFIG_FAIRSCHED + {KERN_FAIRSCHED, "fairsched", &fairsched, sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif {0} }; --- linux-2.2-15pre16/include/linux/sched.h.orig Wed Mar 29 15:40:09 2000 +++ linux-2.2-15pre16/include/linux/sched.h Wed Apr 5 15:54:22 2000 @@ -225,7 +225,14 @@ * Right now it is only used to track how many processes a * user has, but it has the potential to track memory usage etc. */ -struct user_struct; +struct user_struct { + atomic_t count; + struct user_struct *next, **pprev; + struct user_struct *l_next, *l_prev; + struct task_struct *move_task; + unsigned int uid; + long cpu_ticks; +}; struct task_struct { /* these are hardcoded - don't touch */ @@ -438,6 +445,8 @@ /* PID hashing. */ #define PIDHASH_SZ (NR_TASKS >> 2) extern struct task_struct *pidhash[PIDHASH_SZ]; +extern struct user_struct uidhead; +extern rwlock_t uidhash_lock; #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) @@ -813,6 +822,27 @@ #define for_each_task(p) \ for (p = &init_task ; (p = p->next_task) != &init_task ; ) + +#define for_each_user_struct(up) \ + for (up = &uidhead ; (up = up->l_next) != &uidhead ; ) + +static inline void move_task_last(struct user_struct * up) +{ + struct task_struct *p = up->move_task; + struct task_struct *pn = p->next_task; + struct task_struct *pp = p->prev_task; + struct task_struct *ip = init_task.prev_task; + + if (p != ip) { + pn->prev_task = pp; + pp->next_task = pn; + p->prev_task = ip; + p->next_task = &init_task; + init_task.prev_task = p; + ip->next_task = p; + } +} + #endif /* __KERNEL__ */ --- linux-2.2-15pre16/include/linux/sysctl.h.orig Tue Jan 4 16:12:25 2000 +++ linux-2.2-15pre16/include/linux/sysctl.h Thu Mar 30 15:44:57 2000 @@ -106,6 +106,7 @@ KERN_SYSRQ=38, /* int: Sysreq enable */ KERN_SHMALL=41, /* int: maximum size of shared memory */ KERN_SPARC_STOP_A=44, /* int: Sparc Stop-A enable */ + KERN_FAIRSCHED=49, /* turn fair scheduler on/off */ }; --- linux-2.2-15pre16/arch/i386/config.in.orig Wed Mar 29 15:39:58 2000 +++ linux-2.2-15pre16/arch/i386/config.in Thu Mar 30 15:17:27 2000 @@ -85,6 +85,9 @@ fi fi +if [ "$CONFIG_EXPERIMENTAL" = "y" ] ; then + bool 'Fair scheduler' CONFIG_FAIRSCHED +fi bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Fri Apr 07 2000 - 21:00:15 EST