Re: [PACTH v1] mm, proc: Implement /proc/<pid>/totmaps

From: Konstantin Khlebnikov
Date: Tue Aug 09 2016 - 15:17:11 EST


On Tue, Aug 9, 2016 at 7:05 PM, <robert.foss@xxxxxxxxxxxxx> wrote:
> From: Sonny Rao <sonnyrao@xxxxxxxxxxxx>
>
> This is based on earlier work by Thiago Goncales. It implements a new
> per process proc file which summarizes the contents of the smaps file
> but doesn't display any addresses. It gives more detailed information
> than statm like the PSS (proprotional set size). It differs from the
> original implementation in that it doesn't use the full blown set of
> seq operations, uses a different termination condition, and doesn't
> displayed "Locked" as that was broken on the original implemenation.
>
> This new proc file provides information faster than parsing the potentially
> huge smaps file.

What statistics do you really need?

I think, performance and flexibility issues could be really solved only by new
syscall for querying memory statistics for address range in any process:
process_vm_stat() or some kind of pumped fincore() for /proc/$pid/mem

>
> Signed-off-by: Sonny Rao <sonnyrao@xxxxxxxxxxxx>
>
> Tested-by: Robert Foss <robert.foss@xxxxxxxxxxxxx>
> Signed-off-by: Robert Foss <robert.foss@xxxxxxxxxxxxx>
>
> ---
> fs/proc/base.c | 1 +
> fs/proc/internal.h | 4 ++
> fs/proc/task_mmu.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 131 insertions(+)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index a11eb71..de3acdf 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2855,6 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
> REG("smaps", S_IRUGO, proc_pid_smaps_operations),
> REG("pagemap", S_IRUSR, proc_pagemap_operations),
> + REG("totmaps", S_IRUGO, proc_totmaps_operations),
> #endif
> #ifdef CONFIG_SECURITY
> DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index aa27810..6f3540f 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -58,6 +58,9 @@ union proc_op {
> struct task_struct *task);
> };
>
> +
> +extern const struct file_operations proc_totmaps_operations;
> +
> struct proc_inode {
> struct pid *pid;
> int fd;
> @@ -281,6 +284,7 @@ struct proc_maps_private {
> struct mm_struct *mm;
> #ifdef CONFIG_MMU
> struct vm_area_struct *tail_vma;
> + struct mem_size_stats *mss;
> #endif
> #ifdef CONFIG_NUMA
> struct mempolicy *task_mempolicy;
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 4648c7f..b61873e 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -802,6 +802,81 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
> return 0;
> }
>
> +static void add_smaps_sum(struct mem_size_stats *mss,
> + struct mem_size_stats *mss_sum)
> +{
> + mss_sum->resident += mss->resident;
> + mss_sum->pss += mss->pss;
> + mss_sum->shared_clean += mss->shared_clean;
> + mss_sum->shared_dirty += mss->shared_dirty;
> + mss_sum->private_clean += mss->private_clean;
> + mss_sum->private_dirty += mss->private_dirty;
> + mss_sum->referenced += mss->referenced;
> + mss_sum->anonymous += mss->anonymous;
> + mss_sum->anonymous_thp += mss->anonymous_thp;
> + mss_sum->swap += mss->swap;
> +}
> +
> +static int totmaps_proc_show(struct seq_file *m, void *data)
> +{
> + struct proc_maps_private *priv = m->private;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + struct mem_size_stats *mss_sum = priv->mss;
> +
> + /* reference to priv->task already taken */
> + /* but need to get the mm here because */
> + /* task could be in the process of exiting */
> + mm = get_task_mm(priv->task);
> + if (!mm || IS_ERR(mm))
> + return -EINVAL;
> +
> + down_read(&mm->mmap_sem);
> + hold_task_mempolicy(priv);
> +
> + for (vma = mm->mmap; vma != priv->tail_vma; vma = vma->vm_next) {
> + struct mem_size_stats mss;
> + struct mm_walk smaps_walk = {
> + .pmd_entry = smaps_pte_range,
> + .mm = vma->vm_mm,
> + .private = &mss,
> + };
> +
> + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) {
> + memset(&mss, 0, sizeof(mss));
> + walk_page_vma(vma, &smaps_walk);
> + add_smaps_sum(&mss, mss_sum);
> + }
> + }
> + seq_printf(m,
> + "Rss: %8lu kB\n"
> + "Pss: %8lu kB\n"
> + "Shared_Clean: %8lu kB\n"
> + "Shared_Dirty: %8lu kB\n"
> + "Private_Clean: %8lu kB\n"
> + "Private_Dirty: %8lu kB\n"
> + "Referenced: %8lu kB\n"
> + "Anonymous: %8lu kB\n"
> + "AnonHugePages: %8lu kB\n"
> + "Swap: %8lu kB\n",
> + mss_sum->resident >> 10,
> + (unsigned long)(mss_sum->pss >> (10 + PSS_SHIFT)),
> + mss_sum->shared_clean >> 10,
> + mss_sum->shared_dirty >> 10,
> + mss_sum->private_clean >> 10,
> + mss_sum->private_dirty >> 10,
> + mss_sum->referenced >> 10,
> + mss_sum->anonymous >> 10,
> + mss_sum->anonymous_thp >> 10,
> + mss_sum->swap >> 10);
> +
> + release_task_mempolicy(priv);
> + up_read(&mm->mmap_sem);
> + mmput(mm);
> +
> + return 0;
> +}
> +
> static int show_pid_smap(struct seq_file *m, void *v)
> {
> return show_smap(m, v, 1);
> @@ -836,6 +911,50 @@ static int tid_smaps_open(struct inode *inode, struct file *file)
> return do_maps_open(inode, file, &proc_tid_smaps_op);
> }
>
> +static int totmaps_open(struct inode *inode, struct file *file)
> +{
> + struct proc_maps_private *priv;
> + int ret = -ENOMEM;
> + priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> + if (priv) {
> + priv->mss = kzalloc(sizeof(*priv->mss), GFP_KERNEL);
> + if (!priv->mss)
> + return -ENOMEM;
> +
> + /* we need to grab references to the task_struct */
> + /* at open time, because there's a potential information */
> + /* leak where the totmaps file is opened and held open */
> + /* while the underlying pid to task mapping changes */
> + /* underneath it */
> + priv->task = get_pid_task(proc_pid(inode), PIDTYPE_PID);
> + if (!priv->task) {
> + kfree(priv->mss);
> + kfree(priv);
> + return -ESRCH;
> + }
> +
> + ret = single_open(file, totmaps_proc_show, priv);
> + if (ret) {
> + put_task_struct(priv->task);
> + kfree(priv->mss);
> + kfree(priv);
> + }
> + }
> + return ret;
> +}
> +
> +static int totmaps_release(struct inode *inode, struct file *file)
> +{
> + struct seq_file *m = file->private_data;
> + struct proc_maps_private *priv = m->private;
> +
> + put_task_struct(priv->task);
> + kfree(priv->mss);
> + kfree(priv);
> + m->private = NULL;
> + return single_release(inode, file);
> +}
> +
> const struct file_operations proc_pid_smaps_operations = {
> .open = pid_smaps_open,
> .read = seq_read,
> @@ -850,6 +969,13 @@ const struct file_operations proc_tid_smaps_operations = {
> .release = proc_map_release,
> };
>
> +const struct file_operations proc_totmaps_operations = {
> + .open = totmaps_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = totmaps_release,
> +};
> +
> enum clear_refs_types {
> CLEAR_REFS_ALL = 1,
> CLEAR_REFS_ANON,
> --
> 2.7.4
>