Re: [PACTH v1] mm, proc: Implement /proc/<pid>/totmaps

From: Sonny Rao
Date: Tue Aug 09 2016 - 20:30:58 EST


On Tue, Aug 9, 2016 at 12:16 PM, Konstantin Khlebnikov <koct9i@xxxxxxxxx> wrote:
>
> On Tue, Aug 9, 2016 at 7:05 PM, <robert.foss@xxxxxxxxxxxxx> wrote:
> > From: Sonny Rao <sonnyrao@xxxxxxxxxxxx>
> >
> > This is based on earlier work by Thiago Goncales. It implements a new
> > per process proc file which summarizes the contents of the smaps file
> > but doesn't display any addresses. It gives more detailed information
> > than statm like the PSS (proprotional set size). It differs from the
> > original implementation in that it doesn't use the full blown set of
> > seq operations, uses a different termination condition, and doesn't
> > displayed "Locked" as that was broken on the original implemenation.
> >
> > This new proc file provides information faster than parsing the potentially
> > huge smaps file.
>
> What statistics do you really need?

PSS (Proportional Set Size) and related accounting of shared pages
(swap could be shared) is where the existing summaries of memory usage
are cumbersome.

>
>
> I think, performance and flexibility issues could be really solved only by new
> syscall for querying memory statistics for address range in any process:
> process_vm_stat() or some kind of pumped fincore() for /proc/$pid/mem


That would be a good long term solution if people want similarly
complicated statistics without having to iterate through current
interfaces.
I mentioned monitoring before but I'll add that Proportional Set size,
Unique Set Size, Swap are per process are also useful because they
help us make better decisions about what processes need to be
throttled or gracefully killed.

>
> >
> > Signed-off-by: Sonny Rao <sonnyrao@xxxxxxxxxxxx>
> >
> > Tested-by: Robert Foss <robert.foss@xxxxxxxxxxxxx>
> > Signed-off-by: Robert Foss <robert.foss@xxxxxxxxxxxxx>
> >
> > ---
> > fs/proc/base.c | 1 +
> > fs/proc/internal.h | 4 ++
> > fs/proc/task_mmu.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 131 insertions(+)
> >
> > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > index a11eb71..de3acdf 100644
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -2855,6 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> > REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
> > REG("smaps", S_IRUGO, proc_pid_smaps_operations),
> > REG("pagemap", S_IRUSR, proc_pagemap_operations),
> > + REG("totmaps", S_IRUGO, proc_totmaps_operations),
> > #endif
> > #ifdef CONFIG_SECURITY
> > DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
> > diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> > index aa27810..6f3540f 100644
> > --- a/fs/proc/internal.h
> > +++ b/fs/proc/internal.h
> > @@ -58,6 +58,9 @@ union proc_op {
> > struct task_struct *task);
> > };
> >
> > +
> > +extern const struct file_operations proc_totmaps_operations;
> > +
> > struct proc_inode {
> > struct pid *pid;
> > int fd;
> > @@ -281,6 +284,7 @@ struct proc_maps_private {
> > struct mm_struct *mm;
> > #ifdef CONFIG_MMU
> > struct vm_area_struct *tail_vma;
> > + struct mem_size_stats *mss;
> > #endif
> > #ifdef CONFIG_NUMA
> > struct mempolicy *task_mempolicy;
> > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > index 4648c7f..b61873e 100644
> > --- a/fs/proc/task_mmu.c
> > +++ b/fs/proc/task_mmu.c
> > @@ -802,6 +802,81 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
> > return 0;
> > }
> >
> > +static void add_smaps_sum(struct mem_size_stats *mss,
> > + struct mem_size_stats *mss_sum)
> > +{
> > + mss_sum->resident += mss->resident;
> > + mss_sum->pss += mss->pss;
> > + mss_sum->shared_clean += mss->shared_clean;
> > + mss_sum->shared_dirty += mss->shared_dirty;
> > + mss_sum->private_clean += mss->private_clean;
> > + mss_sum->private_dirty += mss->private_dirty;
> > + mss_sum->referenced += mss->referenced;
> > + mss_sum->anonymous += mss->anonymous;
> > + mss_sum->anonymous_thp += mss->anonymous_thp;
> > + mss_sum->swap += mss->swap;
> > +}
> > +
> > +static int totmaps_proc_show(struct seq_file *m, void *data)
> > +{
> > + struct proc_maps_private *priv = m->private;
> > + struct mm_struct *mm;
> > + struct vm_area_struct *vma;
> > + struct mem_size_stats *mss_sum = priv->mss;
> > +
> > + /* reference to priv->task already taken */
> > + /* but need to get the mm here because */
> > + /* task could be in the process of exiting */
> > + mm = get_task_mm(priv->task);
> > + if (!mm || IS_ERR(mm))
> > + return -EINVAL;
> > +
> > + down_read(&mm->mmap_sem);
> > + hold_task_mempolicy(priv);
> > +
> > + for (vma = mm->mmap; vma != priv->tail_vma; vma = vma->vm_next) {
> > + struct mem_size_stats mss;
> > + struct mm_walk smaps_walk = {
> > + .pmd_entry = smaps_pte_range,
> > + .mm = vma->vm_mm,
> > + .private = &mss,
> > + };
> > +
> > + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) {
> > + memset(&mss, 0, sizeof(mss));
> > + walk_page_vma(vma, &smaps_walk);
> > + add_smaps_sum(&mss, mss_sum);
> > + }
> > + }
> > + seq_printf(m,
> > + "Rss: %8lu kB\n"
> > + "Pss: %8lu kB\n"
> > + "Shared_Clean: %8lu kB\n"
> > + "Shared_Dirty: %8lu kB\n"
> > + "Private_Clean: %8lu kB\n"
> > + "Private_Dirty: %8lu kB\n"
> > + "Referenced: %8lu kB\n"
> > + "Anonymous: %8lu kB\n"
> > + "AnonHugePages: %8lu kB\n"
> > + "Swap: %8lu kB\n",
> > + mss_sum->resident >> 10,
> > + (unsigned long)(mss_sum->pss >> (10 + PSS_SHIFT)),
> > + mss_sum->shared_clean >> 10,
> > + mss_sum->shared_dirty >> 10,
> > + mss_sum->private_clean >> 10,
> > + mss_sum->private_dirty >> 10,
> > + mss_sum->referenced >> 10,
> > + mss_sum->anonymous >> 10,
> > + mss_sum->anonymous_thp >> 10,
> > + mss_sum->swap >> 10);
> > +
> > + release_task_mempolicy(priv);
> > + up_read(&mm->mmap_sem);
> > + mmput(mm);
> > +
> > + return 0;
> > +}
> > +
> > static int show_pid_smap(struct seq_file *m, void *v)
> > {
> > return show_smap(m, v, 1);
> > @@ -836,6 +911,50 @@ static int tid_smaps_open(struct inode *inode, struct file *file)
> > return do_maps_open(inode, file, &proc_tid_smaps_op);
> > }
> >
> > +static int totmaps_open(struct inode *inode, struct file *file)
> > +{
> > + struct proc_maps_private *priv;
> > + int ret = -ENOMEM;
> > + priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > + if (priv) {
> > + priv->mss = kzalloc(sizeof(*priv->mss), GFP_KERNEL);
> > + if (!priv->mss)
> > + return -ENOMEM;
> > +
> > + /* we need to grab references to the task_struct */
> > + /* at open time, because there's a potential information */
> > + /* leak where the totmaps file is opened and held open */
> > + /* while the underlying pid to task mapping changes */
> > + /* underneath it */
> > + priv->task = get_pid_task(proc_pid(inode), PIDTYPE_PID);
> > + if (!priv->task) {
> > + kfree(priv->mss);
> > + kfree(priv);
> > + return -ESRCH;
> > + }
> > +
> > + ret = single_open(file, totmaps_proc_show, priv);
> > + if (ret) {
> > + put_task_struct(priv->task);
> > + kfree(priv->mss);
> > + kfree(priv);
> > + }
> > + }
> > + return ret;
> > +}
> > +
> > +static int totmaps_release(struct inode *inode, struct file *file)
> > +{
> > + struct seq_file *m = file->private_data;
> > + struct proc_maps_private *priv = m->private;
> > +
> > + put_task_struct(priv->task);
> > + kfree(priv->mss);
> > + kfree(priv);
> > + m->private = NULL;
> > + return single_release(inode, file);
> > +}
> > +
> > const struct file_operations proc_pid_smaps_operations = {
> > .open = pid_smaps_open,
> > .read = seq_read,
> > @@ -850,6 +969,13 @@ const struct file_operations proc_tid_smaps_operations = {
> > .release = proc_map_release,
> > };
> >
> > +const struct file_operations proc_totmaps_operations = {
> > + .open = totmaps_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = totmaps_release,
> > +};
> > +
> > enum clear_refs_types {
> > CLEAR_REFS_ALL = 1,
> > CLEAR_REFS_ANON,
> > --
> > 2.7.4
> >