[014/151] sched: Rate-limit newidle

From: Greg KH
Date: Wed Dec 16 2009 - 23:51:31 EST


2.6.32-stable review patch. If anyone has any objections, please let us know.

------------------

From: Mike Galbraith <efault@xxxxxx>

commit 1b9508f6831e10d53256825de8904caa22d1ca2c upstream.

Rate limit newidle to migration_cost. It's a win for all
stages of sysbench oltp tests.

Signed-off-by: Mike Galbraith <efault@xxxxxx>
Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
kernel/sched.c | 22 +++++++++++++++++++++-
kernel/sched_debug.c | 4 ++++
2 files changed, 25 insertions(+), 1 deletion(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -591,6 +591,8 @@ struct rq {

u64 rt_avg;
u64 age_stamp;
+ u64 idle_stamp;
+ u64 avg_idle;
#endif

/* calc_load related fields */
@@ -2384,6 +2386,17 @@ static int try_to_wake_up(struct task_st
if (rq != orig_rq)
update_rq_clock(rq);

+ if (rq->idle_stamp) {
+ u64 delta = rq->clock - rq->idle_stamp;
+ u64 max = 2*sysctl_sched_migration_cost;
+
+ if (delta > max)
+ rq->avg_idle = max;
+ else
+ update_avg(&rq->avg_idle, delta);
+ rq->idle_stamp = 0;
+ }
+
WARN_ON(p->state != TASK_WAKING);
cpu = task_cpu(p);

@@ -4429,6 +4442,11 @@ static void idle_balance(int this_cpu, s
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;

+ this_rq->idle_stamp = this_rq->clock;
+
+ if (this_rq->avg_idle < sysctl_sched_migration_cost)
+ return;
+
for_each_domain(this_cpu, sd) {
unsigned long interval;

@@ -4443,8 +4461,10 @@ static void idle_balance(int this_cpu, s
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
- if (pulled_task)
+ if (pulled_task) {
+ this_rq->idle_stamp = 0;
break;
+ }
}
if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
/*
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m

#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
+#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);

P(yld_count);

P(sched_switch);
P(sched_count);
P(sched_goidle);
+#ifdef CONFIG_SMP
+ P64(avg_idle);
+#endif

P(ttwu_count);
P(ttwu_local);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/