[PATCH v2 2/2] mm: hugetlb: add VmHugetlbRSS: field in /proc/pid/status

From: Naoya Horiguchi
Date: Fri Aug 07 2015 - 03:26:33 EST


Currently there's no easy way to get per-process usage of hugetlb pages, which
is inconvenient because applications which use hugetlb typically want to control
their processes on the basis of how much memory (including hugetlb) they use.
So this patch simply provides easy access to the info via /proc/pid/status.

This patch shouldn't change the OOM behavior (so hugetlb usage is ignored as
is now,) which I guess is fine until we have some strong reason to do it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
fs/proc/task_mmu.c | 5 ++++-
include/linux/hugetlb.h | 18 ++++++++++++++++++
include/linux/mm.h | 3 +++
include/linux/mm_types.h | 3 +++
mm/hugetlb.c | 9 +++++++++
mm/memory.c | 4 ++--
mm/rmap.c | 4 +++-
7 files changed, 42 insertions(+), 4 deletions(-)

diff --git v4.2-rc4.orig/fs/proc/task_mmu.c v4.2-rc4/fs/proc/task_mmu.c
index c7218603306d..f181f56fcce2 100644
--- v4.2-rc4.orig/fs/proc/task_mmu.c
+++ v4.2-rc4/fs/proc/task_mmu.c
@@ -22,7 +22,7 @@
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
unsigned long data, text, lib, swap, ptes, pmds;
- unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
+ unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss, hugetlb_rss;

/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -37,6 +37,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
hiwater_rss = total_rss = get_mm_rss(mm);
if (hiwater_rss < mm->hiwater_rss)
hiwater_rss = mm->hiwater_rss;
+ hugetlb_rss = get_hugetlb_rss(mm);

data = mm->total_vm - mm->shared_vm - mm->stack_vm;
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
@@ -51,6 +52,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"VmPin:\t%8lu kB\n"
"VmHWM:\t%8lu kB\n"
"VmRSS:\t%8lu kB\n"
+ "VmHugetlbRSS:\t%8lu kB\n"
"VmData:\t%8lu kB\n"
"VmStk:\t%8lu kB\n"
"VmExe:\t%8lu kB\n"
@@ -64,6 +66,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
mm->pinned_vm << (PAGE_SHIFT-10),
hiwater_rss << (PAGE_SHIFT-10),
total_rss << (PAGE_SHIFT-10),
+ hugetlb_rss << (PAGE_SHIFT-10),
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
ptes >> 10,
diff --git v4.2-rc4.orig/include/linux/hugetlb.h v4.2-rc4/include/linux/hugetlb.h
index d891f949466a..6319df124e68 100644
--- v4.2-rc4.orig/include/linux/hugetlb.h
+++ v4.2-rc4/include/linux/hugetlb.h
@@ -469,6 +469,21 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
#define hugepages_supported() (HPAGE_SHIFT != 0)
#endif

+/*
+ * This simple wrappers are to hide MM_HUGETLBPAGES from outside hugetlbfs
+ * subsystem. The counter MM_HUGETLBPAGES is maintained in page unit basis,
+ * so it changes by 512 for example if a 2MB hugepage is mapped or unmapped.
+ */
+static inline int get_hugetlb_rss(struct mm_struct *mm)
+{
+ return get_mm_counter(mm, MM_HUGETLBPAGES);
+}
+
+static inline void mod_hugetlb_rss(struct mm_struct *mm, long value)
+{
+ add_mm_counter(mm, MM_HUGETLBPAGES, value);
+}
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
#define alloc_huge_page_node(h, nid) NULL
@@ -504,6 +519,9 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
{
return &mm->page_table_lock;
}
+
+#define get_hugetlb_rss(mm) 0
+#define mod_hugetlb_rss(mm, value) do {} while (0)
#endif /* CONFIG_HUGETLB_PAGE */

static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git v4.2-rc4.orig/include/linux/mm.h v4.2-rc4/include/linux/mm.h
index 2e872f92dbac..9218a8856483 100644
--- v4.2-rc4.orig/include/linux/mm.h
+++ v4.2-rc4/include/linux/mm.h
@@ -1355,6 +1355,9 @@ static inline void sync_mm_rss(struct mm_struct *mm)
}
#endif

+extern inline void init_rss_vec(int *rss);
+extern inline void add_mm_rss_vec(struct mm_struct *mm, int *rss);
+
int vma_wants_writenotify(struct vm_area_struct *vma);

extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
diff --git v4.2-rc4.orig/include/linux/mm_types.h v4.2-rc4/include/linux/mm_types.h
index 0038ac7466fd..887b43ba5a18 100644
--- v4.2-rc4.orig/include/linux/mm_types.h
+++ v4.2-rc4/include/linux/mm_types.h
@@ -348,6 +348,9 @@ enum {
MM_FILEPAGES,
MM_ANONPAGES,
MM_SWAPENTS,
+#ifdef CONFIG_HUGETLB_PAGE
+ MM_HUGETLBPAGES,
+#endif
NR_MM_COUNTERS
};

diff --git v4.2-rc4.orig/mm/hugetlb.c v4.2-rc4/mm/hugetlb.c
index a8c3087089d8..12e5e7d3b60f 100644
--- v4.2-rc4.orig/mm/hugetlb.c
+++ v4.2-rc4/mm/hugetlb.c
@@ -2743,9 +2743,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
int ret = 0;
+ int rss[NR_MM_COUNTERS];

cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;

+ init_rss_vec(rss);
mmun_start = vma->vm_start;
mmun_end = vma->vm_end;
if (cow)
@@ -2797,6 +2799,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
get_page(ptepage);
page_dup_rmap(ptepage);
set_huge_pte_at(dst, addr, dst_pte, entry);
+ rss[MM_HUGETLBPAGES] += pages_per_huge_page(h);
}
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
@@ -2805,6 +2808,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
if (cow)
mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end);

+ add_mm_rss_vec(dst, rss);
return ret;
}

@@ -2823,6 +2827,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
const unsigned long mmun_start = start; /* For mmu_notifiers */
const unsigned long mmun_end = end; /* For mmu_notifiers */
+ int rss[NR_MM_COUNTERS];

WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
@@ -2832,6 +2837,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
again:
+ init_rss_vec(rss);
for (; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
@@ -2877,6 +2883,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (huge_pte_dirty(pte))
set_page_dirty(page);

+ rss[MM_HUGETLBPAGES] -= pages_per_huge_page(h);
page_remove_rmap(page);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
@@ -2892,6 +2899,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unlock:
spin_unlock(ptl);
}
+ add_mm_rss_vec(mm, rss);
/*
* mmu_gather ran out of room to batch pages, we break out of
* the PTE lock to avoid doing the potential expensive TLB invalidate
@@ -3261,6 +3269,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
&& (vma->vm_flags & VM_SHARED)));
set_huge_pte_at(mm, address, ptep, new_pte);

+ mod_hugetlb_rss(mm, pages_per_huge_page(h));
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
diff --git v4.2-rc4.orig/mm/memory.c v4.2-rc4/mm/memory.c
index 388dcf9aa283..e09b53da2733 100644
--- v4.2-rc4.orig/mm/memory.c
+++ v4.2-rc4/mm/memory.c
@@ -620,12 +620,12 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
return 0;
}

-static inline void init_rss_vec(int *rss)
+inline void init_rss_vec(int *rss)
{
memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
}

-static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
+inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
{
int i;

diff --git v4.2-rc4.orig/mm/rmap.c v4.2-rc4/mm/rmap.c
index 171b68768df1..78e77b0ea3c3 100644
--- v4.2-rc4.orig/mm/rmap.c
+++ v4.2-rc4/mm/rmap.c
@@ -1230,7 +1230,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
update_hiwater_rss(mm);

if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
- if (!PageHuge(page)) {
+ if (PageHuge(page)) {
+ mod_hugetlb_rss(mm, -(1 << compound_order(page)));
+ } else {
if (PageAnon(page))
dec_mm_counter(mm, MM_ANONPAGES);
else
--
2.4.3
N‹§²æ¸›yú²X¬¶ÇvØ–)Þ{.nlj·¥Š{±‘êX§¶›¡Ü}©ž²ÆzÚj:+v‰¨¾«‘êZ+€Êzf£¢·hšˆ§~†­†Ûÿû®w¥¢¸?™¨è&¢)ßf”ùy§m…á«a¶Úÿ 0¶ìå