[RFC PATCH] mm, kvm: account kvm_vcpu_mmap to kmemcg

From: Shakeel Butt
Date: Thu Mar 28 2019 - 21:29:04 EST


A VCPU of a VM can allocate upto three pages which can be mmap'ed by the
user space application. At the moment this memory is not charged. On a
large machine running large number of VMs (or small number of VMs having
large number of VCPUs), this unaccounted memory can be very significant.
So, this memory should be charged to a kmemcg. However that is not
possible as these pages are mmapped to the userspace and PageKmemcg()
was designed with the assumption that such pages will never be mmapped
to the userspace.

One way to solve this problem is by introducing an additional memcg
charging API similar to mem_cgroup_[un]charge_skmem(). However skmem
charging API usage is contained and shared and no new users are
expected but the pages which can be mmapped and should be charged to
kmemcg can and will increase. So, requiring the usage for such API will
increase the maintenance burden. The simplest solution is to remove the
assumption of no mmapping PageKmemcg() pages to user space.

Signed-off-by: Shakeel Butt <shakeelb@xxxxxxxxxx>
---
arch/s390/kvm/kvm-s390.c | 2 +-
arch/x86/kvm/x86.c | 2 +-
include/linux/page-flags.h | 26 ++++++++++++++++++--------
include/trace/events/mmflags.h | 9 ++++++++-
virt/kvm/coalesced_mmio.c | 2 +-
virt/kvm/kvm_main.c | 2 +-
6 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4638303ba6a8..1a9e337ed5da 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2953,7 +2953,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
goto out;

BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
- sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+ sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!sie_page)
goto out_free_cpu;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 65e4559eef2f..05c0c7eaa5c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9019,7 +9019,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
else
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;

- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
goto fail;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 9f8712a4b1a5..b47a6a327d6a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -78,6 +78,10 @@
* PG_hwpoison indicates that a page got corrupted in hardware and contains
* data with incorrect ECC bits that triggered a machine check. Accessing is
* not safe since it may cause another machine check. Don't touch!
+ *
+ * PG_kmemcg indicates that a kmem page is charged to a memcg. If kmemcg is
+ * enabled, the page allocator will set PageKmemcg() on pages allocated with
+ * __GFP_ACCOUNT. It gets cleared on page free.
*/

/*
@@ -130,6 +134,9 @@ enum pageflags {
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
PG_young,
PG_idle,
+#endif
+#ifdef CONFIG_MEMCG_KMEM
+ PG_kmemcg,
#endif
__NR_PAGEFLAGS,

@@ -289,6 +296,9 @@ static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname) \
static inline void SetPage##uname(struct page *page) { }

+#define __SETPAGEFLAG_NOOP(uname) \
+static inline void __SetPage##uname(struct page *page) { }
+
#define CLEARPAGEFLAG_NOOP(uname) \
static inline void ClearPage##uname(struct page *page) { }

@@ -427,6 +437,13 @@ TESTCLEARFLAG(Young, young, PF_ANY)
PAGEFLAG(Idle, idle, PF_ANY)
#endif

+#ifdef CONFIG_MEMCG_KMEM
+__PAGEFLAG(Kmemcg, kmemcg, PF_NO_TAIL)
+#else
+TESTPAGEFLAG_FALSE(kmemcg)
+__SETPAGEFLAG_NOOP(kmemcg)
+__CLEARPAGEFLAG_NOOP(kmemcg)
+#endif
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
@@ -701,8 +718,7 @@ PAGEFLAG_FALSE(DoubleMap)
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
-#define PG_kmemcg 0x00000200
-#define PG_table 0x00000400
+#define PG_table 0x00000200

#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -743,12 +759,6 @@ PAGE_TYPE_OPS(Buddy, buddy)
*/
PAGE_TYPE_OPS(Offline, offline)

-/*
- * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
- * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
- */
-PAGE_TYPE_OPS(Kmemcg, kmemcg)
-
/*
* Marks pages in use as page tables.
*/
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a1675d43777e..d93b78eac5b9 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -79,6 +79,12 @@
#define IF_HAVE_PG_IDLE(flag,string)
#endif

+#ifdef CONFIG_MEMCG_KMEM
+#define IF_HAVE_PG_KMEMCG(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_KMEMCG(flag,string)
+#endif
+
#define __def_pageflag_names \
{1UL << PG_locked, "locked" }, \
{1UL << PG_waiters, "waiters" }, \
@@ -105,7 +111,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \
IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \
IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \
IF_HAVE_PG_IDLE(PG_young, "young" ) \
-IF_HAVE_PG_IDLE(PG_idle, "idle" )
+IF_HAVE_PG_IDLE(PG_idle, "idle" ) \
+IF_HAVE_PG_KMEMCG(PG_kmemcg, "kmemcg" )

#define show_page_flags(flags) \
(flags) ? __print_flags(flags, "|", \
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 5294abb3f178..ebf1601de2a5 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,7 +110,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
int ret;

ret = -ENOMEM;
- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page)
goto out_err;

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f25aa98a94df..de6328dff251 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -306,7 +306,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->pre_pcpu = -1;
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);

- page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
goto fail;
--
2.21.0.392.gf8f6787159e-goog