[PATCH RFC 1/5] vm_cgroup: basic infrastructure

From: Vladimir Davydov
Date: Thu Jul 03 2014 - 08:49:13 EST


This patch introduces the vm cgroup to control address space expansion
of tasks that belong to a cgroup. The idea is to provide a mechanism to
limit memory overcommit not only for the whole system, but also on per
cgroup basis.

This patch only adds some basic cgroup methods, like alloc/free and
write/read, while the real accounting/limiting is done in the following
patches.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
---
include/linux/cgroup_subsys.h | 4 ++
include/linux/vm_cgroup.h | 18 ++++++
init/Kconfig | 4 ++
mm/Makefile | 1 +
mm/vm_cgroup.c | 131 +++++++++++++++++++++++++++++++++++++++++
5 files changed, 158 insertions(+)
create mode 100644 include/linux/vm_cgroup.h
create mode 100644 mm/vm_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 98c4f9b12b03..8eb7db12f6ea 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -47,6 +47,10 @@ SUBSYS(net_prio)
SUBSYS(hugetlb)
#endif

+#if IS_ENABLED(CONFIG_CGROUP_VM)
+SUBSYS(vm)
+#endif
+
/*
* The following subsystems are not supported on the default hierarchy.
*/
diff --git a/include/linux/vm_cgroup.h b/include/linux/vm_cgroup.h
new file mode 100644
index 000000000000..b629c9affa4b
--- /dev/null
+++ b/include/linux/vm_cgroup.h
@@ -0,0 +1,18 @@
+#ifndef _LINUX_VM_CGROUP_H
+#define _LINUX_VM_CGROUP_H
+
+#ifdef CONFIG_CGROUP_VM
+static inline bool vm_cgroup_disabled(void)
+{
+ if (vm_cgrp_subsys.disabled)
+ return true;
+ return false;
+}
+#else /* !CONFIG_CGROUP_VM */
+static inline bool vm_cgroup_disabled(void)
+{
+ return true;
+}
+#endif /* CONFIG_CGROUP_VM */
+
+#endif /* _LINUX_VM_CGROUP_H */
diff --git a/init/Kconfig b/init/Kconfig
index 9d76b99af1b9..4419835bea7c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1008,6 +1008,10 @@ config MEMCG_KMEM
unusable in real life so DO NOT SELECT IT unless for development
purposes.

+config CGROUP_VM
+ bool "Virtual Memory Resource Controller for Control Groups"
+ default n
+
config CGROUP_HUGETLB
bool "HugeTLB Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS && HUGETLB_PAGE
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..914520d2669f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
+obj-$(CONFIG_CGROUP_VM) += vm_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
diff --git a/mm/vm_cgroup.c b/mm/vm_cgroup.c
new file mode 100644
index 000000000000..7f5b81482748
--- /dev/null
+++ b/mm/vm_cgroup.c
@@ -0,0 +1,131 @@
+#include <linux/cgroup.h>
+#include <linux/res_counter.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vm_cgroup.h>
+
+struct vm_cgroup {
+ struct cgroup_subsys_state css;
+
+ /*
+ * The counter to account for vm usage.
+ */
+ struct res_counter res;
+};
+
+static struct vm_cgroup *root_vm_cgroup __read_mostly;
+
+static inline bool vm_cgroup_is_root(struct vm_cgroup *vmcg)
+{
+ return vmcg == root_vm_cgroup;
+}
+
+static struct vm_cgroup *vm_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+ return s ? container_of(s, struct vm_cgroup, css) : NULL;
+}
+
+static struct cgroup_subsys_state *
+vm_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct vm_cgroup *parent = vm_cgroup_from_css(parent_css);
+ struct vm_cgroup *vmcg;
+
+ vmcg = kzalloc(sizeof(*vmcg), GFP_KERNEL);
+ if (!vmcg)
+ return ERR_PTR(-ENOMEM);
+
+ res_counter_init(&vmcg->res, parent ? &parent->res : NULL);
+
+ if (!parent)
+ root_vm_cgroup = vmcg;
+
+ return &vmcg->css;
+}
+
+static void vm_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+ struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+
+ kfree(vmcg);
+}
+
+static u64 vm_cgroup_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct vm_cgroup *vmcg = vm_cgroup_from_css(css);
+ int memb = cft->private;
+
+ return res_counter_read_u64(&vmcg->res, memb);
+}
+
+static ssize_t vm_cgroup_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct vm_cgroup *vmcg = vm_cgroup_from_css(of_css(of));
+ unsigned long long val;
+ int ret;
+
+ if (vm_cgroup_is_root(vmcg))
+ return -EINVAL;
+
+ buf = strstrip(buf);
+ ret = res_counter_memparse_write_strategy(buf, &val);
+ if (ret)
+ return ret;
+
+ ret = res_counter_set_limit(&vmcg->res, val);
+ return ret ?: nbytes;
+}
+
+static ssize_t vm_cgroup_reset(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct vm_cgroup *vmcg= vm_cgroup_from_css(of_css(of));
+ int memb = of_cft(of)->private;
+
+ switch (memb) {
+ case RES_MAX_USAGE:
+ res_counter_reset_max(&vmcg->res);
+ break;
+ case RES_FAILCNT:
+ res_counter_reset_failcnt(&vmcg->res);
+ break;
+ default:
+ BUG();
+ }
+ return nbytes;
+}
+
+static struct cftype vm_cgroup_files[] = {
+ {
+ .name = "usage_in_bytes",
+ .private = RES_USAGE,
+ .read_u64 = vm_cgroup_read_u64,
+ },
+ {
+ .name = "max_usage_in_bytes",
+ .private = RES_MAX_USAGE,
+ .write = vm_cgroup_reset,
+ .read_u64 = vm_cgroup_read_u64,
+ },
+ {
+ .name = "limit_in_bytes",
+ .private = RES_LIMIT,
+ .write = vm_cgroup_write,
+ .read_u64 = vm_cgroup_read_u64,
+ },
+ {
+ .name = "failcnt",
+ .private = RES_FAILCNT,
+ .write = vm_cgroup_reset,
+ .read_u64 = vm_cgroup_read_u64,
+ },
+ { }, /* terminate */
+};
+
+struct cgroup_subsys vm_cgrp_subsys = {
+ .css_alloc = vm_cgroup_css_alloc,
+ .css_free = vm_cgroup_css_free,
+ .base_cftypes = vm_cgroup_files,
+};
--
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/