one more fix: do not forced-migrate tasks in wakeup if it violates the
affinity mask. My current scheduler tree is attached, against 2.5.22.
Ingo
--- linux/kernel/sched.c.orig Mon Jun 17 05:43:53 2002
+++ linux/kernel/sched.c Mon Jun 17 06:46:01 2002
@@ -6,14 +6,14 @@
* Copyright (C) 1991-2002 Linus Torvalds
*
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
- * make semaphores SMP safe
+ * make semaphores SMP safe
* 1998-11-19 Implemented schedule_timeout() and related stuff
* by Andrea Arcangeli
* 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
- * hybrid priority-list and round-robin design with
- * an array-switch method of distributing timeslices
- * and per-CPU runqueues. Additional code by Davide
- * Libenzi, Robert Love, and Rusty Russell.
+ * hybrid priority-list and round-robin design with
+ * an array-switch method of distributing timeslices
+ * and per-CPU runqueues. Additional code by Davide
+ * Libenzi, Robert Love, and Rusty Russell.
*/
#include <linux/mm.h>
@@ -180,11 +180,14 @@
/*
* rq_lock - lock a given runqueue and disable interrupts.
*/
-static inline runqueue_t *rq_lock(runqueue_t *rq)
+static inline runqueue_t *this_rq_lock(void)
{
+ runqueue_t *rq;
+
local_irq_disable();
rq = this_rq();
spin_lock(&rq->lock);
+
return rq;
}
@@ -358,12 +361,17 @@
rq = task_rq_lock(p, &flags);
old_state = p->state;
if (!p->array) {
- if (unlikely(sync && (rq->curr != p))) {
- if (p->thread_info->cpu != smp_processor_id()) {
- p->thread_info->cpu = smp_processor_id();
- task_rq_unlock(rq, &flags);
- goto repeat_lock_task;
- }
+ /*
+ * Fast-migrate the task if it's not running or runnable
+ * currently. Do not violate hard affinity.
+ */
+ if (unlikely(sync && (rq->curr != p) &&
+ (p->thread_info->cpu != smp_processor_id()) &&
+ (p->cpus_allowed & (1UL << smp_processor_id())))) {
+
+ p->thread_info->cpu = smp_processor_id();
+ task_rq_unlock(rq, &flags);
+ goto repeat_lock_task;
}
if (old_state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
@@ -388,9 +396,7 @@
void wake_up_forked_process(task_t * p)
{
- runqueue_t *rq;
-
- rq = rq_lock(rq);
+ runqueue_t *rq = this_rq_lock();
p->state = TASK_RUNNING;
if (!rt_task(p)) {
@@ -797,7 +803,8 @@
list_t *queue;
int idx;
- BUG_ON(in_interrupt());
+ if (unlikely(in_interrupt()))
+ BUG();
#if CONFIG_DEBUG_HIGHMEM
check_highmem_ptes();
@@ -1158,13 +1165,12 @@
static int setscheduler(pid_t pid, int policy, struct sched_param *param)
{
struct sched_param lp;
+ int retval = -EINVAL;
prio_array_t *array;
unsigned long flags;
runqueue_t *rq;
- int retval;
task_t *p;
- retval = -EINVAL;
if (!param || pid < 0)
goto out_nounlock;
@@ -1251,10 +1257,9 @@
asmlinkage long sys_sched_getscheduler(pid_t pid)
{
+ int retval = -EINVAL;
task_t *p;
- int retval;
- retval = -EINVAL;
if (pid < 0)
goto out_nounlock;
@@ -1271,11 +1276,10 @@
asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
{
- task_t *p;
struct sched_param lp;
- int retval;
+ int retval = -EINVAL;
+ task_t *p;
- retval = -EINVAL;
if (!param || pid < 0)
goto out_nounlock;
@@ -1310,8 +1314,8 @@
unsigned long *user_mask_ptr)
{
unsigned long new_mask;
- task_t *p;
int retval;
+ task_t *p;
if (len < sizeof(new_mask))
return -EINVAL;
@@ -1361,13 +1365,12 @@
asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
unsigned long *user_mask_ptr)
{
- unsigned long mask;
unsigned int real_len;
- task_t *p;
+ unsigned long mask;
int retval;
+ task_t *p;
real_len = sizeof(mask);
-
if (len < real_len)
return -EINVAL;
@@ -1392,25 +1395,35 @@
asmlinkage long sys_sched_yield(void)
{
- runqueue_t *rq;
- prio_array_t *array;
-
- rq = rq_lock(rq);
+ runqueue_t *rq = this_rq_lock();
+ prio_array_t *array = current->array;
/*
- * Decrease the yielding task's priority by one, to avoid
- * livelocks. This priority loss is temporary, it's recovered
- * once the current timeslice expires.
+ * There are three levels of how a yielding task will give up
+ * the current CPU:
*
- * If priority is already MAX_PRIO-1 then we still
- * roundrobin the task within the runlist.
- */
- array = current->array;
- /*
- * If the task has reached maximum priority (or is a RT task)
- * then just requeue the task to the end of the runqueue:
+ * #1 - it decreases its priority by one. This priority loss is
+ * temporary, it's recovered once the current timeslice
+ * expires.
+ *
+ * #2 - once it has reached the lowest priority level,
+ * it will give up timeslices one by one. (We do not
+ * want to give them up all at once, it's gradual,
+ * to protect the casual yield()er.)
+ *
+ * #3 - once all timeslices are gone we put the process into
+ * the expired array.
+ *
+ * (special rule: RT tasks do not lose any priority, they just
+ * roundrobin on their current priority level.)
*/
- if (likely(current->prio == MAX_PRIO-1 || rt_task(current))) {
+ if (likely(current->prio == MAX_PRIO-1)) {
+ if (current->time_slice <= 1) {
+ dequeue_task(current, rq->active);
+ enqueue_task(current, rq->expired);
+ } else
+ current->time_slice--;
+ } else if (unlikely(rt_task(current))) {
list_del(¤t->run_list);
list_add_tail(¤t->run_list, array->queue + current->prio);
} else {
@@ -1461,9 +1474,9 @@
asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
{
+ int retval = -EINVAL;
struct timespec t;
task_t *p;
- int retval = -EINVAL;
if (pid < 0)
goto out_nounlock;
@@ -1758,8 +1771,8 @@
static int migration_thread(void * bind_cpu)
{
- int cpu = cpu_logical_map((int) (long) bind_cpu);
struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
+ int cpu = cpu_logical_map((int) (long) bind_cpu);
runqueue_t *rq;
int ret;
@@ -1836,15 +1849,14 @@
int cpu;
current->cpus_allowed = 1UL << cpu_logical_map(0);
- for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
if (kernel_thread(migration_thread, (void *) (long) cpu,
CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
BUG();
- }
current->cpus_allowed = -1L;
for (cpu = 0; cpu < smp_num_cpus; cpu++)
while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
schedule_timeout(2);
}
-#endif /* CONFIG_SMP */
+#endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun Jun 23 2002 - 22:00:12 EST