[PATCH v2 13/14] provide a version of cpuusage statistics inside cpu cgroup

From: Glauber Costa
Date: Tue Nov 01 2011 - 17:21:32 EST


For users interested in using the information currently displayed
at cpuacct.usage and cpuaact.usage_per_cpu, we provide them inside
the cpu cgroup.

Signed-off-by: Glauber Costa <glommer@xxxxxxxxxxxxx>
CC: Balbir Singh <bsingharora@xxxxxxxxx>
---
kernel/sched.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched_fair.c | 10 ++++++
kernel/sched_rt.c | 4 ++
3 files changed, 97 insertions(+), 0 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index d93cfd4..b9296cd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -303,6 +303,7 @@ struct task_group {

struct cfs_bandwidth cfs_bandwidth;
struct kernel_cpustat __percpu *cpustat;
+ u64 __percpu *cpuusage;
struct timespec start_time;
};

@@ -344,6 +345,8 @@ struct cfs_rq {
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
+ u64 sum_exec_runtime;
+ u64 prev_sum_exec_runtime;

struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
@@ -547,7 +550,10 @@ struct rt_rq {
struct rq *rq;
struct list_head leaf_rt_rq_list;
struct task_group *tg;
+
#endif
+ u64 sum_exec_runtime;
+ u64 prev_sum_exec_runtime;
};

#ifdef CONFIG_SMP
@@ -8359,6 +8365,10 @@ void __init sched_init(void)

root_task_group.start_time = (struct timespec){0, 0};
root_task_group.cpustat = &kernel_cpustat;
+
+ root_task_group.cpuusage = alloc_percpu(u64);
+ /* Failing that early an allocation means we're screwed anyway */
+ BUG_ON(!root_task_group.cpuusage);
#endif /* CONFIG_CGROUP_SCHED */

for_each_possible_cpu(i) {
@@ -8796,6 +8806,7 @@ static void free_sched_group(struct task_group *tg)
free_rt_sched_group(tg);
autogroup_free(tg);
free_percpu(tg->cpustat);
+ free_percpu(tg->cpuusage);
kfree(tg);
}

@@ -8816,6 +8827,10 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent))
goto err;

+ tg->cpuusage = alloc_percpu(u64);
+ if (!tg->cpuusage)
+ goto err;
+
tg->cpustat = alloc_percpu(struct kernel_cpustat);
if (!tg->cpustat)
goto err;
@@ -9629,6 +9644,65 @@ static int cpu_cgroup_stats_show(struct cgroup *cgrp, struct cftype *cft,
return 0;
}

+static u64 cpu_cgroup_usage_cpu(struct cgroup *cgrp, int i)
+{
+ struct task_group *tg = cgroup_tg(cgrp);
+ u64 ret = 0;
+
+ ret = tg->cfs_rq[i]->sum_exec_runtime;
+
+ return ret;
+}
+
+static u64 cpu_cgroup_cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ u64 totalcpuusage = 0;
+ int i;
+
+ for_each_present_cpu(i)
+ totalcpuusage += cpu_cgroup_usage_cpu(cgrp, i);
+
+ return totalcpuusage;
+}
+
+static int cpu_cgroup_cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
+ u64 reset)
+{
+ int err = 0;
+ int i;
+ struct task_group *tg = cgroup_tg(cgrp);
+
+ if (reset) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ for_each_present_cpu(i)
+ if (tg == &root_task_group)
+ cpu_rq(i)->cfs.prev_sum_exec_runtime =
+ cpu_rq(i)->cfs.sum_exec_runtime;
+ else
+ tg->se[i]->prev_sum_exec_runtime =
+ tg->se[i]->sum_exec_runtime;
+
+out:
+ return err;
+}
+
+static int cpu_cgroup_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
+ struct seq_file *m)
+{
+ u64 percpu;
+ int i;
+
+ for_each_present_cpu(i) {
+ percpu = cpu_cgroup_usage_cpu(cgroup, i);
+ seq_printf(m, "%llu ", (unsigned long long) percpu);
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
static struct cftype cpu_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
@@ -9673,6 +9747,15 @@ static struct cftype cpu_files[] = {
.name = "stat",
.read_map = cpu_cgroup_stats_show,
},
+ {
+ .name = "usage",
+ .read_u64 = cpu_cgroup_cpuusage_read,
+ .write_u64 = cpu_cgroup_cpuusage_write,
+ },
+ {
+ .name = "usage_percpu",
+ .read_seq_string = cpu_cgroup_percpu_seq_read,
+ },
};

/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c9e679..030b8eb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -560,6 +560,16 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
max((u64)delta_exec, curr->statistics.exec_max));

curr->sum_exec_runtime += delta_exec;
+
+ /*
+ * sched_entities are moved around runqueues and cpus at all times.
+ * we want to record the total exec time of a particular entity (curr)
+ * but we are also interested in the total time this particular runqueue
+ * got. So we have to increase the total runtime in two different locations
+ */
+ if (static_branch(&sched_cgroup_enabled))
+ cfs_rq->sum_exec_runtime += delta_exec;
+
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 056cbd2..2edaeb4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -686,6 +686,10 @@ static void update_curr_rt(struct rq *rq)
schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));

curr->se.sum_exec_runtime += delta_exec;
+
+ if (static_branch(&sched_cgroup_enabled))
+ rq->rt.sum_exec_runtime += delta_exec;
+
account_group_exec_runtime(curr, delta_exec);

curr->se.exec_start = rq->clock_task;
--
1.7.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/