[PATCH] Faster scheduler and separate RT run queue

Richard Gooch (rgooch@atnf.csiro.au)
Wed, 30 Sep 1998 12:09:05 +1000


Hi, all. Appended is a patch that does 3 things:

- maintains a separate run queue for RT processes

- simplifies the scheduler code path and makes it more robust

- fixes a bug with sched_yield(2) and RT processes yielding to lower
priority processes.

I've posted some initial results at:
http://www.atnf.csiro.au/~rgooch/benchmarks/
and I'll fill this in as I benchmark more machines.

To give you an idea of the results: for a PPro 180, the basic RT
process switch time goes from 2.2 us to 1.9 us and the basic RT thread
switch time goes from 1.5 us to 1.0 us. This benefit is due to a
simplified scheduler code path.

In addition, maintaining the separate RT run queue means that the
switch times are the same irrespective or whether there are an extra
10 or 100 SCHED_OTHER processes.

Linus: I'd like to get your opinion of this patch. It seems to me that
this provides the benefit of a separate RT run queue without bloating
the code and also yielding an advantage in the general case.
Even if you take the view that a long run queue is unlikely, by
separating the run queues allows other (general) optimisations to be
done, resulting in a faster scheduler anyway.

Regards,

Richard....

diff -urN linux-2.1.123/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.1.123/include/linux/sched.h Thu Sep 10 02:10:16 1998
+++ linux/include/linux/sched.h Wed Sep 30 11:05:56 1998
@@ -227,6 +227,7 @@
int last_processor;
int lock_depth; /* Lock depth. We can context switch in and out of holding a syscall kernel lock... */
struct task_struct *next_task, *prev_task;
+ struct task_struct *run_head; /* The run queue I am destined for */
struct task_struct *next_run, *prev_run;

/* task state */
@@ -340,7 +341,7 @@
/* state etc */ { 0,0,0,KERNEL_DS,&default_exec_domain,0, \
/* counter */ DEF_PRIORITY,DEF_PRIORITY, \
/* SMP */ 0,0,0,-1, \
-/* schedlink */ &init_task,&init_task, &init_task, &init_task, \
+/* schedlink */ &init_task,&init_task, &init_task, &init_task, &init_task, \
/* binfmt */ NULL, \
/* ec,brk... */ 0,0,0,0,0,0, \
/* pid etc.. */ 0,0,0,0,0, \
diff -urN linux-2.1.123/kernel/sched.c linux/kernel/sched.c
--- linux-2.1.123/kernel/sched.c Sat Sep 5 02:13:29 1998
+++ linux/kernel/sched.c Wed Sep 30 11:18:45 1998
@@ -99,6 +99,17 @@

struct task_struct * task[NR_TASKS] = {&init_task, };

+/* Pseudo-idle task for RT run queue. I'm not entirely happy with allocating
+ a whole task structure, but this has the advantage of avoiding an extra
+ level of indirection in the scheduler inner loops. I may get around to
+ doing a partial allocation here (evil). rgooch@atnf.csiro.au
+*/
+static struct task_struct rt_idle = {
+ rt_priority: -1000,
+ prev_run: &rt_idle,
+ next_run: &rt_idle,
+};
+
struct kernel_stat kstat = { 0 };

void scheduling_functions_start_here(void) { }
@@ -155,10 +166,11 @@
*/
static inline void add_to_runqueue(struct task_struct * p)
{
- struct task_struct *next = init_task.next_run;
+ struct task_struct *head = p->run_head;
+ struct task_struct *next = head->next_run;

- p->prev_run = &init_task;
- init_task.next_run = p;
+ p->prev_run = head;
+ head->next_run = p;
p->next_run = next;
next->prev_run = p;
}
@@ -177,6 +189,7 @@

static inline void move_last_runqueue(struct task_struct * p)
{
+ struct task_struct *head = p->run_head;
struct task_struct *next = p->next_run;
struct task_struct *prev = p->prev_run;

@@ -184,15 +197,16 @@
next->prev_run = prev;
prev->next_run = next;
/* add back to list */
- p->next_run = &init_task;
- prev = init_task.prev_run;
- init_task.prev_run = p;
+ p->next_run = head;
+ prev = head->prev_run;
+ head->prev_run = p;
p->prev_run = prev;
prev->next_run = p;
}

static inline void move_first_runqueue(struct task_struct * p)
{
+ struct task_struct *head = p->run_head;
struct task_struct *next = p->next_run;
struct task_struct *prev = p->prev_run;

@@ -200,9 +214,9 @@
next->prev_run = prev;
prev->next_run = next;
/* add back to list */
- p->prev_run = &init_task;
- next = init_task.next_run;
- init_task.next_run = p;
+ p->prev_run = head;
+ next = head->next_run;
+ head->next_run = p;
p->next_run = next;
next->prev_run = p;
}
@@ -276,14 +290,6 @@
}

/*
- * Realtime process, select the first one on the
- * runqueue (taking priorities within processes
- * into account).
- */
- if (policy != SCHED_OTHER)
- return 1000 + p->rt_priority;
-
- /*
* Give the process a first-approximation goodness value
* according to the number of clock-ticks it has left.
*
@@ -499,7 +505,8 @@
case TASK_RUNNING:
}
{
- struct task_struct * p = init_task.next_run;
+ struct task_struct * p = rt_idle.next_run;
+ struct task_struct * op = init_task.next_run;
/*
* This is subtle.
* Note how we can enable interrupts here, even
@@ -524,6 +531,17 @@
{
int c = -1000;
next = idle_task;
+
+ /* First scan for RT process to run: ignore others */
+ while (p != &rt_idle) {
+ if (can_schedule(p)) {
+ int weight = prev->rt_priority;
+ if (weight > c)
+ c = weight, next = p;
+ }
+ p = p->next_run;
+ }
+ p = (next == idle_task) ? op : &init_task;
while (p != &init_task) {
if (can_schedule(p)) {
int weight = goodness(p, prev, this_cpu);
@@ -1349,7 +1367,7 @@
struct sched_param *param)
{
struct sched_param lp;
- struct task_struct *p;
+ struct task_struct *p, *head;
int retval;

retval = -EINVAL;
@@ -1403,8 +1421,14 @@
retval = 0;
p->policy = policy;
p->rt_priority = lp.sched_priority;
- if (p->next_run)
- move_first_runqueue(p);
+ head = (policy == SCHED_OTHER) ? &init_task : &rt_idle;
+ if (p->next_run) {
+ del_from_runqueue(p);
+ p->run_head = head;
+ add_to_runqueue(p);
+ nr_running++;
+ }
+ else p->run_head = head;

current->need_resched = 1;

@@ -1488,7 +1512,8 @@
{
spin_lock(&scheduler_lock);
spin_lock_irq(&runqueue_lock);
- current->policy |= SCHED_YIELD;
+ if (current->policy == SCHED_OTHER)
+ current->policy |= SCHED_YIELD;
current->need_resched = 1;
move_last_runqueue(current);
spin_unlock_irq(&runqueue_lock);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/