[PATCH v5 2/2] mm: hugetlb: proc: add HugetlbPages field to /proc/PID/status

From: Naoya Horiguchi
Date: Thu Aug 20 2015 - 04:44:01 EST


Currently there's no easy way to get per-process usage of hugetlb pages, which
is inconvenient because userspace applications which use hugetlb typically want
to control their processes on the basis of how much memory (including hugetlb)
they use. So this patch simply provides easy access to the info via
/proc/PID/status.

With this patch, for example, /proc/PID/status shows a line like this:

HugetlbPages: 20480 kB (10*2048kB)

If your system supports and enables multiple hugepage sizes, the line looks
like this:

HugetlbPages: 1069056 kB (1*1048576kB 10*2048kB)

, so you can easily know how many hugepages in which pagesize are used by a
process.

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Acked-by: Joern Engel <joern@xxxxxxxxx>
Acked-by: David Rientjes <rientjes@xxxxxxxxxx>
---
v4 -> v5:
- add (struct hugetlb_usage *) to struct mm_struct
- use %lu instead of %d for seq_printf()
- introduce hugetlb_fork

v3 -> v4:
- rename field (VmHugetlbRSS is not the best name)
- introduce struct hugetlb_usage in struct mm_struct (no invasion to struct
mm_rss_stat)
- introduce hugetlb_report_usage()
- merged documentation update

v2 -> v3:
- use inline functions instead of macros for !CONFIG_HUGETLB_PAGE
---
Documentation/filesystems/proc.txt | 3 +++
fs/hugetlbfs/inode.c | 12 ++++++++++
fs/proc/task_mmu.c | 1 +
include/linux/hugetlb.h | 36 +++++++++++++++++++++++++++++
include/linux/mm_types.h | 7 ++++++
kernel/fork.c | 3 +++
mm/hugetlb.c | 46 ++++++++++++++++++++++++++++++++++++++
mm/mmap.c | 1 +
mm/rmap.c | 4 +++-
9 files changed, 112 insertions(+), 1 deletion(-)

diff --git v4.2-rc4/Documentation/filesystems/proc.txt v4.2-rc4_patched/Documentation/filesystems/proc.txt
index 22e40211ef64..f561fc46e41b 100644
--- v4.2-rc4/Documentation/filesystems/proc.txt
+++ v4.2-rc4_patched/Documentation/filesystems/proc.txt
@@ -174,6 +174,7 @@ For example, to get the status information of a process, all you have to do is
VmLib: 1412 kB
VmPTE: 20 kb
VmSwap: 0 kB
+ HugetlbPages: 0 kB (0*2048kB)
Threads: 1
SigQ: 0/28578
SigPnd: 0000000000000000
@@ -237,6 +238,8 @@ Table 1-2: Contents of the status files (as of 4.1)
VmPTE size of page table entries
VmPMD size of second level page tables
VmSwap size of swap usage (the number of referred swapents)
+ HugetlbPages size of hugetlb memory portions (with additional info
+ about number of mapped hugepages for each page size)
Threads number of threads
SigQ number of signals queued/max. number for queue
SigPnd bitmap of pending signals for the thread
diff --git v4.2-rc4/fs/hugetlbfs/inode.c v4.2-rc4_patched/fs/hugetlbfs/inode.c
index 0cf74df68617..bf6ea2645d35 100644
--- v4.2-rc4/fs/hugetlbfs/inode.c
+++ v4.2-rc4_patched/fs/hugetlbfs/inode.c
@@ -115,6 +115,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;

+ if (!vma->vm_mm->hugetlb_usage) {
+ vma->vm_mm->hugetlb_usage = kzalloc(sizeof(struct hugetlb_usage),
+ GFP_KERNEL);
+ if (!vma->vm_mm->hugetlb_usage)
+ return -ENOMEM;
+ }
+
vma_len = (loff_t)(vma->vm_end - vma->vm_start);

mutex_lock(&inode->i_mutex);
@@ -138,6 +145,11 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
return ret;
}

+void exit_hugetlb_mmap(struct mm_struct *mm)
+{
+ kfree(mm->hugetlb_usage);
+}
+
/*
* Called under down_write(mmap_sem).
*/
diff --git v4.2-rc4/fs/proc/task_mmu.c v4.2-rc4_patched/fs/proc/task_mmu.c
index 2c37938b82ee..b3cf7fa9ef6c 100644
--- v4.2-rc4/fs/proc/task_mmu.c
+++ v4.2-rc4_patched/fs/proc/task_mmu.c
@@ -69,6 +69,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
ptes >> 10,
pmds >> 10,
swap << (PAGE_SHIFT-10));
+ hugetlb_report_usage(m, mm);
}

unsigned long task_vsize(struct mm_struct *mm)
diff --git v4.2-rc4/include/linux/hugetlb.h v4.2-rc4_patched/include/linux/hugetlb.h
index d891f949466a..db642ad0b847 100644
--- v4.2-rc4/include/linux/hugetlb.h
+++ v4.2-rc4_patched/include/linux/hugetlb.h
@@ -469,6 +469,25 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
#define hugepages_supported() (HPAGE_SHIFT != 0)
#endif

+struct hugetlb_usage {
+ atomic_long_t count[HUGE_MAX_HSTATE];
+};
+
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm);
+void exit_hugetlb_mmap(struct mm_struct *mm);
+int hugetlb_fork(struct mm_struct *new, struct mm_struct *old);
+
+static inline void inc_hugetlb_count(struct mm_struct *mm, struct hstate *h)
+{
+ VM_BUG_ON_MM(!mm->hugetlb_usage, mm);
+ atomic_long_inc(&mm->hugetlb_usage->count[hstate_index(h)]);
+}
+
+static inline void dec_hugetlb_count(struct mm_struct *mm, struct hstate *h)
+{
+ VM_BUG_ON_MM(!mm->hugetlb_usage, mm);
+ atomic_long_dec(&mm->hugetlb_usage->count[hstate_index(h)]);
+}
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
#define alloc_huge_page_node(h, nid) NULL
@@ -504,6 +523,23 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
{
return &mm->page_table_lock;
}
+
+static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m)
+{
+}
+
+static inline void exit_hugetlb_mmap(struct mm_struct *mm)
+{
+}
+
+static inline int hugetlb_fork(struct mm_struct *new, struct mm_struct *old)
+{
+ return 0;
+}
+
+static inline void dec_hugetlb_count(struct mm_struct *mm, struct hstate *h)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE */

static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git v4.2-rc4/include/linux/mm_types.h v4.2-rc4_patched/include/linux/mm_types.h
index 0038ac7466fd..851e964ee8d6 100644
--- v4.2-rc4/include/linux/mm_types.h
+++ v4.2-rc4_patched/include/linux/mm_types.h
@@ -364,6 +364,10 @@ struct mm_rss_stat {
atomic_long_t count[NR_MM_COUNTERS];
};

+#ifdef CONFIG_HUGETLB_PAGE
+struct hugetlb_usage;
+#endif
+
struct kioctx_table;
struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */
@@ -484,6 +488,9 @@ struct mm_struct {
/* address of the bounds directory */
void __user *bd_addr;
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ struct hugetlb_usage *hugetlb_usage;
+#endif
};

static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git v4.2-rc4/kernel/fork.c v4.2-rc4_patched/kernel/fork.c
index dbd9b8d7b7cc..d43baa91d48c 100644
--- v4.2-rc4/kernel/fork.c
+++ v4.2-rc4_patched/kernel/fork.c
@@ -425,6 +425,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
retval = khugepaged_fork(mm, oldmm);
if (retval)
goto out;
+ retval = hugetlb_fork(mm, oldmm);
+ if (retval)
+ goto out;

prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
diff --git v4.2-rc4/mm/hugetlb.c v4.2-rc4_patched/mm/hugetlb.c
index a8c3087089d8..3aa8c7919364 100644
--- v4.2-rc4/mm/hugetlb.c
+++ v4.2-rc4_patched/mm/hugetlb.c
@@ -2562,6 +2562,49 @@ void hugetlb_show_meminfo(void)
1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
}

+static unsigned long mm_hstate_usage(struct mm_struct *mm, int hs_idx)
+{
+ if (!mm->hugetlb_usage)
+ return 0;
+ return atomic_long_read(&mm->hugetlb_usage->count[hs_idx]);
+}
+
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
+{
+ int i;
+ unsigned long total_usage = 0;
+
+ for (i = 0; i < HUGE_MAX_HSTATE; i++) {
+ total_usage += mm_hstate_usage(mm, i) *
+ (huge_page_size(&hstates[i]) >> 10);
+ }
+
+ seq_printf(m, "HugetlbPages:\t%8lu kB (", total_usage);
+ for (i = 0; i < HUGE_MAX_HSTATE; i++) {
+ if (huge_page_order(&hstates[i]) == 0)
+ break;
+ if (i > 0)
+ seq_puts(m, " ");
+
+ seq_printf(m, "%ld*%lukB", mm_hstate_usage(mm, i),
+ huge_page_size(&hstates[i]) >> 10);
+ }
+ seq_puts(m, ")\n");
+}
+
+int hugetlb_fork(struct mm_struct *new, struct mm_struct *old)
+{
+ if (old->hugetlb_usage) {
+ new->hugetlb_usage = kmalloc(sizeof(struct hugetlb_usage),
+ GFP_KERNEL);
+ if (!new->hugetlb_usage)
+ return -ENOMEM;
+ memcpy(new->hugetlb_usage, old->hugetlb_usage,
+ sizeof(struct hugetlb_usage));
+ }
+ return 0;
+}
+
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
@@ -2797,6 +2840,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
get_page(ptepage);
page_dup_rmap(ptepage);
set_huge_pte_at(dst, addr, dst_pte, entry);
+ inc_hugetlb_count(dst, h);
}
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
@@ -2877,6 +2921,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (huge_pte_dirty(pte))
set_page_dirty(page);

+ dec_hugetlb_count(mm, h);
page_remove_rmap(page);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
@@ -3261,6 +3306,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
&& (vma->vm_flags & VM_SHARED)));
set_huge_pte_at(mm, address, ptep, new_pte);

+ inc_hugetlb_count(mm, h);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
diff --git v4.2-rc4/mm/mmap.c v4.2-rc4_patched/mm/mmap.c
index aa632ade2be7..9d9562bc79a8 100644
--- v4.2-rc4/mm/mmap.c
+++ v4.2-rc4_patched/mm/mmap.c
@@ -2847,6 +2847,7 @@ void exit_mmap(struct mm_struct *mm)
nr_accounted += vma_pages(vma);
vma = remove_vma(vma);
}
+ exit_hugetlb_mmap(mm);
vm_unacct_memory(nr_accounted);
}

diff --git v4.2-rc4/mm/rmap.c v4.2-rc4_patched/mm/rmap.c
index 171b68768df1..b33278bc4ddb 100644
--- v4.2-rc4/mm/rmap.c
+++ v4.2-rc4_patched/mm/rmap.c
@@ -1230,7 +1230,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
update_hiwater_rss(mm);

if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
- if (!PageHuge(page)) {
+ if (PageHuge(page)) {
+ dec_hugetlb_count(mm, page_hstate(page));
+ } else {
if (PageAnon(page))
dec_mm_counter(mm, MM_ANONPAGES);
else
--
2.4.3