[PATCH 9/9] Report steal time for cgroup

From: Glauber Costa
Date: Wed Sep 14 2011 - 16:06:35 EST


This patch introduces a functionality commonly found in
hypervisors: steal time.

For those not particularly familiar with it, steal time
is defined as any time in which a virtual machine (or container)
wanted to perform cpu work, but could not due to another
VM/container being scheduled in its place. Note that idle
time is never defined as steal time.

Assuming each container will live in its cgroup, we can
very easily and nicely calculate steal time as all user/system
time recorded in our sibling cgroups.

Signed-off-by: Glauber Costa <glommer@xxxxxxxxxxxxx>
---
kernel/sched.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 8f254d0..2dad1e0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9321,6 +9321,7 @@ static int cpuacct_proc_stat(struct cgroup *cgrp, struct cftype *cft,
struct cpuacct *ca = cgroup_ca(cgrp);
struct timespec ts;
u64 *cpustat;
+ struct cgroup *sib;

user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
@@ -9343,6 +9344,20 @@ static int cpuacct_proc_stat(struct cgroup *cgrp, struct cftype *cft,
irq = cputime64_add(irq, cpustat[CPUACCT_STAT_IRQ]);
softirq = cputime64_add(softirq, cpustat[CPUACCT_STAT_SOFTIRQ]);
steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
+ rcu_read_lock();
+ list_for_each_entry(sib, &ca->css.cgroup->sibling, sibling) {
+ u64 *cpustat_sib;
+ struct cpuacct *ca_sib = cgroup_ca(sib);
+ if (!ca_sib)
+ continue;
+
+ cpustat_sib = per_cpu_ptr(ca_sib->cpustat, i);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_USER]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_SYSTEM]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_IRQ]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_SOFTIRQ]);
+ }
+ rcu_read_unlock();
guest = cputime64_add(guest, cpustat[CPUACCT_STAT_GUEST]);
guest_nice = cputime64_add(guest_nice,
cpustat[CPUACCT_STAT_GUEST_NICE]);
@@ -9384,6 +9399,21 @@ static int cpuacct_proc_stat(struct cgroup *cgrp, struct cftype *cft,
irq = cpustat[CPUACCT_STAT_IRQ];
softirq = cpustat[CPUACCT_STAT_SOFTIRQ];
steal = kstat_cpu(i).cpustat.steal;
+ rcu_read_lock();
+ list_for_each_entry(sib, &ca->css.cgroup->sibling, sibling) {
+ u64 *cpustat_sib;
+ struct cpuacct *ca_sib = cgroup_ca(sib);
+
+ if (!ca_sib)
+ continue;
+ cpustat_sib = per_cpu_ptr(ca_sib->cpustat, i);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_USER]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_SYSTEM]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_IRQ]);
+ steal = cputime64_add(steal, cpustat_sib[CPUACCT_STAT_SOFTIRQ]);
+ }
+ rcu_read_unlock();
+
guest = cpustat[CPUACCT_STAT_GUEST];
guest_nice = cpustat[CPUACCT_STAT_GUEST_NICE];
seq_printf(p,
--
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/