Re: [PATCH 2/6] Add IRQ_TIME_ACCOUNTING, finer accounting of CPUirq time

From: Peter Zijlstra
Date: Sun Sep 19 2010 - 07:12:17 EST


On Thu, 2010-09-16 at 18:56 -0700, Venkatesh Pallipadi wrote:
>
> Signed-off-by: Venkatesh Pallipadi <venki@xxxxxxxxxx>
> ---
> include/linux/hardirq.h | 2 +-
> include/linux/sched.h | 11 +++++++++++
> kernel/sched.c | 38 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 50 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
> index ce22d09..bfafd29 100644
> --- a/include/linux/hardirq.h
> +++ b/include/linux/hardirq.h
> @@ -132,7 +132,7 @@ extern void synchronize_irq(unsigned int irq);
>
> struct task_struct;
>
> -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
> +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
> static inline void account_system_vtime(struct task_struct *tsk)
> {
> }
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 1e2a6db..dbb6808 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1826,6 +1826,17 @@ extern void sched_clock_idle_sleep_event(void);
> extern void sched_clock_idle_wakeup_event(u64 delta_ns);
> #endif
>
> +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
> +/*
> + * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
> + * The reason for this explicit opt-in is not to have perf penalty with
> + * slow sched_clocks.
> + */
> +extern void enable_sched_clock_irqtime(void);
> +#else
> +static inline void enable_sched_clock_irqtime(void) {}
> +#endif
> +
> extern unsigned long long
> task_sched_runtime(struct task_struct *task);
> extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
> diff --git a/kernel/sched.c b/kernel/sched.c
> index ed09d4f..912d2de 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -1917,6 +1917,44 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
> dec_nr_running(rq);
> }
>
> +#ifdef CONFIG_IRQ_TIME_ACCOUNTING
> +
> +static DEFINE_PER_CPU(u64, cpu_hardirq_time);
> +static DEFINE_PER_CPU(u64, cpu_softirq_time);
> +
> +static DEFINE_PER_CPU(u64, irq_start_time);
> +static int sched_clock_irqtime;
> +
> +void enable_sched_clock_irqtime(void)
> +{
> + sched_clock_irqtime = 1;
> +}
> +
> +void account_system_vtime(struct task_struct *tsk)
> +{
> + unsigned long flags;
> + int cpu;
> + u64 now, delta;
> +
> + if (!sched_clock_irqtime)
> + return;
> +
> + local_irq_save(flags);
> +
> + cpu = task_cpu(tsk);

Can this ever be anything other can smp_processor_id() and current?

> + now = sched_clock();

this should be using one of the kernel/sched_clock.c thingies, probably
local_clock(), or sched_clock_cpu(cpu).

> + delta = now - per_cpu(irq_start_time, cpu);
> + per_cpu(irq_start_time, cpu) = now;
> + if (hardirq_count())
> + per_cpu(cpu_hardirq_time, cpu) += delta;
> + else if (softirq_count())
> + per_cpu(cpu_softirq_time, cpu) += delta;
> +
> + local_irq_restore(flags);
> +}

Also, this isn't a complete API, its very asymmetric, please cure that.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/