Re: High CPU load when machine is idle (related to PROBLEM:Unusually high load average when idle in 2.6.35, 2.6.35.1 and later)

From: Peter Zijlstra
Date: Wed Oct 20 2010 - 10:26:03 EST


On Wed, 2010-10-20 at 16:14 +0200, Peter Zijlstra wrote:

> ---
> include/linux/sched.h | 8 ++++++++
> kernel/sched.c | 28 +++++++++++++++++++++-------
> kernel/sched_idletask.c | 1 -
> kernel/time/tick-sched.c | 2 ++
> 4 files changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 0383601..5311ef4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -145,6 +145,14 @@ extern unsigned long this_cpu_load(void);
>
> extern void calc_global_load(void);
>
> +#ifdef CONFIG_NO_HZ
> +extern void calc_load_account_idle(void);
> +extern void calc_load_account_nonidle(void);
> +#else
> +static inline void calc_load_account_idle(void) { }
> +static inline void calc_load_account_nonidle(void) { }
> +#endif
> +
> extern unsigned long get_parent_ip(unsigned long addr);
>
> struct seq_file;
> diff --git a/kernel/sched.c b/kernel/sched.c
> index abf8440..79a29e6 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -526,6 +526,10 @@ struct rq {
> /* calc_load related fields */
> unsigned long calc_load_update;
> long calc_load_active;
> +#ifdef CONFIG_NO_HZ
> + long calc_load_inactive;
> + int calc_load_seq;
> +#endif
>
> #ifdef CONFIG_SCHED_HRTICK
> #ifdef CONFIG_SMP
> @@ -1833,7 +1837,6 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
> }
> #endif
>
> -static void calc_load_account_idle(struct rq *this_rq);
> static void update_sysctl(void);
> static int get_update_sysctl_factor(void);
> static void update_cpu_load(struct rq *this_rq);
> @@ -3111,16 +3114,29 @@ static long calc_load_fold_active(struct rq *this_rq)
> * When making the ILB scale, we should try to pull this in as well.
> */
> static atomic_long_t calc_load_tasks_idle;
> +static atomic_t calc_load_seq;
>
> -static void calc_load_account_idle(struct rq *this_rq)
> +void calc_load_account_idle(void)
> {
> + struct rq *this_rq = this_rq();
> long delta;
>
> delta = calc_load_fold_active(this_rq);
> + this_rq->calc_load_inactive = delta;
> + this_rq->calc_load_seq = atomic_read(&calc_load_seq);
> +
> if (delta)
> atomic_long_add(delta, &calc_load_tasks_idle);
> }
>
> +void calc_load_account_nonidle(void)
> +{
> + struct rq *this_rq = this_rq();
> +
> + if (atomic_read(&calc_load_seq) == this_rq->calc_load_seq)
> + atomic_long_add(this_rq->calc_load_inactive, &calc_load_tasks_idle);

So that should read: that atomic_long_sub()

Trouble is, load goes down with that patch fixed, it just never goes
up :/


> +}
> +
> static long calc_load_fold_idle(void)
> {
> long delta = 0;
> @@ -3128,16 +3144,14 @@ static long calc_load_fold_idle(void)
> /*
> * Its got a race, we don't care...
> */
> - if (atomic_long_read(&calc_load_tasks_idle))
> + if (atomic_long_read(&calc_load_tasks_idle)) {
> + atomic_inc(&calc_load_seq);
> delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
> + }
>
> return delta;
> }
> #else
> -static void calc_load_account_idle(struct rq *this_rq)
> -{
> -}
> -
> static inline long calc_load_fold_idle(void)
> {
> return 0;
> diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
> index 9fa0f402..6ca191f 100644
> --- a/kernel/sched_idletask.c
> +++ b/kernel/sched_idletask.c
> @@ -23,7 +23,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
> static struct task_struct *pick_next_task_idle(struct rq *rq)
> {
> schedstat_inc(rq, sched_goidle);
> - calc_load_account_idle(rq);
> return rq->idle;
> }
>
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index 3e216e0..808abd7 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -411,6 +411,7 @@ void tick_nohz_stop_sched_tick(int inidle)
> ts->tick_stopped = 1;
> ts->idle_jiffies = last_jiffies;
> rcu_enter_nohz();
> + calc_load_account_idle();
> }
>
> ts->idle_sleeps++;
> @@ -520,6 +521,7 @@ void tick_nohz_restart_sched_tick(void)
>
> ts->inidle = 0;
>
> + calc_load_account_nonidle();
> rcu_exit_nohz();
>
> /* Update jiffies first */
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/