[PATCH] memcg: reduce size of per-cpu-stat to be appropriate size.

From: KAMEZAWA Hiroyuki
Date: Fri Nov 14 2008 - 00:50:29 EST


How about this one ?
tested on x86-64 + mmotm-Nov10, works well.
(test on other arch is welcome.)

-Kame
==
As Jan Blunck <jblunck@xxxxxxx> pointed out, allocating
per-cpu stat for memcg to the size of NR_CPUS is not good.

This patch changes mem_cgroup's cpustat allocation not based
on NR_CPUS but based on nr_cpu_ids.

From: Jan Blunck <jblunck@xxxxxxx>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

---
mm/memcontrol.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)

Index: mmotm-2.6.28-Nov10/mm/memcontrol.c
===================================================================
--- mmotm-2.6.28-Nov10.orig/mm/memcontrol.c
+++ mmotm-2.6.28-Nov10/mm/memcontrol.c
@@ -60,7 +60,7 @@ struct mem_cgroup_stat_cpu {
} ____cacheline_aligned_in_smp;

struct mem_cgroup_stat {
- struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
+ struct mem_cgroup_stat_cpu cpustat[0];
};

/*
@@ -129,11 +129,10 @@ struct mem_cgroup {

int prev_priority; /* for recording reclaim priority */
/*
- * statistics.
+ * statistics. This must be placed at the end of memcg.
*/
struct mem_cgroup_stat stat;
};
-static struct mem_cgroup init_mem_cgroup;

enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -1292,42 +1291,45 @@ static void free_mem_cgroup_per_zone_inf
kfree(mem->info.nodeinfo[node]);
}

+static int mem_cgroup_size(void)
+{
+ int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
+ return sizeof(struct mem_cgroup) + cpustat_size;
+}
+
+
static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *mem;
+ int size = mem_cgroup_size();

- if (sizeof(*mem) < PAGE_SIZE)
- mem = kmalloc(sizeof(*mem), GFP_KERNEL);
+ if (size < PAGE_SIZE)
+ mem = kmalloc(size, GFP_KERNEL);
else
- mem = vmalloc(sizeof(*mem));
+ mem = vmalloc(size);

if (mem)
- memset(mem, 0, sizeof(*mem));
+ memset(mem, 0, size);
return mem;
}

static void mem_cgroup_free(struct mem_cgroup *mem)
{
- if (sizeof(*mem) < PAGE_SIZE)
+ if (mem_cgroup_size() < PAGE_SIZE)
kfree(mem);
else
vfree(mem);
}

-
static struct cgroup_subsys_state *
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct mem_cgroup *mem;
int node;

- if (unlikely((cont->parent) == NULL)) {
- mem = &init_mem_cgroup;
- } else {
- mem = mem_cgroup_alloc();
- if (!mem)
- return ERR_PTR(-ENOMEM);
- }
+ mem = mem_cgroup_alloc();
+ if (!mem)
+ return ERR_PTR(-ENOMEM);

res_counter_init(&mem->res);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/