[RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency

From: Mike Chan
Date: Mon Apr 05 2010 - 15:33:46 EST


New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled.

cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported
in nano-seconds

We do not know the cpufreq table size at compile time.
So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced,
to determine the cpufreq table per-cpu in the cpuacct struct.

Signed-off-by: Mike Chan <mike@xxxxxxxxxxx>
---
init/Kconfig | 5 +++
kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index eb77e8c..e1e86df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -534,6 +534,11 @@ config CGROUP_CPUACCT
Provides a simple Resource Controller for monitoring the
total CPU consumed by the tasks in a cgroup.

+config CPUACCT_CPUFREQ_TABLE_MAX
+ int "Max CPUFREQ table size"
+ depends on CGROUP_CPUACCT && CPU_FREQ_TABLE
+ default 32
+
config RESOURCE_COUNTERS
bool "Resource counters"
help
diff --git a/kernel/sched.c b/kernel/sched.c
index 528a105..a0b56b5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <linux/cpufreq.h>

#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = {
* (balbir@xxxxxxxxxx).
*/

+#ifdef CONFIG_CPU_FREQ_STAT
+/* The alloc_percpu macro uses typeof so we must define a type here. */
+typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t;
+#endif
+
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
@@ -8824,6 +8830,10 @@ struct cpuacct {
u64 __percpu *cpuusage;
struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
struct cpuacct *parent;
+#ifdef CONFIG_CPU_FREQ_STAT
+ /* cpufreq_usage is a pointer to an array of u64-types on every cpu */
+ cpufreq_usage_t *cpufreq_usage;
+#endif
};

struct cgroup_subsys cpuacct_subsys;
@@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create(
if (!ca->cpuusage)
goto out_free_ca;

+#ifdef CONFIG_CPU_FREQ_STAT
+ ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t);
+#endif
+
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
if (percpu_counter_init(&ca->cpustat[i], 0))
goto out_free_counters;
@@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
kfree(ca);
}

+#ifdef CONFIG_CPU_FREQ_STAT
+static int cpufreq_index;
+static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ int ret;
+ struct cpufreq_policy *policy;
+ struct cpufreq_freqs *freq = data;
+ struct cpufreq_frequency_table *table;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ /* Update cpufreq_index with current speed */
+ table = cpufreq_frequency_get_table(freq->cpu);
+ policy = cpufreq_cpu_get(freq->cpu);
+ ret = cpufreq_frequency_table_target(policy, table,
+ cpufreq_quick_get(freq->cpu),
+ CPUFREQ_RELATION_L, &cpufreq_index);
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+
+static struct notifier_block cpufreq_notifier = {
+ .notifier_call = cpuacct_cpufreq_notify,
+};
+
+static __init int cpuacct_init(void)
+{
+ return cpufreq_register_notifier(&cpufreq_notifier,
+ CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+module_init(cpuacct_init);
+
+static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ int i;
+ unsigned int cpu;
+ char buf[32];
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ struct cpufreq_frequency_table *table =
+ cpufreq_frequency_get_table(smp_processor_id());
+
+ for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ u64 total = 0;
+
+ if (table[i].frequency == CPUFREQ_ENTRY_INVALID)
+ continue;
+
+ for_each_present_cpu(cpu) {
+ cpufreq_usage_t *cpufreq_usage;
+
+ cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu);
+ table = cpufreq_frequency_get_table(cpu);
+
+ total += cpufreq_usage->usage[i];
+ }
+
+ snprintf(buf, sizeof(buf), "%d", table[i].frequency);
+ cb->fill(cb, buf, total);
+ }
+
+ return 0;
+}
+#endif
+
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
{
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
@@ -9003,6 +9085,12 @@ static struct cftype files[] = {
.name = "stat",
.read_map = cpuacct_stats_show,
},
+#ifdef CONFIG_CPU_FREQ_STAT
+ {
+ .name = "cpufreq",
+ .read_map = cpuacct_cpufreq_show,
+ },
+#endif
};

static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)

for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+#ifdef CONFIG_CPU_FREQ_STAT
+ cpufreq_usage_t *cpufreq_usage =
+ per_cpu_ptr(ca->cpufreq_usage, cpu);
+
+ if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX)
+ printk_once(KERN_WARNING "cpuacct_charge: "
+ "cpufreq_index: %d exceeds max table "
+ "size\n", cpufreq_index);
+ else
+ cpufreq_usage->usage[cpufreq_index] += cputime;
+#endif
*cpuusage += cputime;
}

--
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/