[patch 3/3 take2] smaps: add clear_refs file to clear reference

From: David Rientjes
Date: Wed Feb 07 2007 - 01:17:06 EST


Adds an additional file to /proc/pid: clear_refs. When any non-zero
number is written to this file, all the PG_referenced flags and
PAGE_ACCESSED (meaning the page has been accessed) are cleared within each
VMA for the corresponding task.

It is now possible to measure how much memory a task is using by clearing
the reference bits with

echo 1 > /proc/pid/clear_refs

and checking the reference count for each VMA from the /proc/pid/smaps
output at a time interval later.

The /proc/pid/clear_refs file is only writable by the user who owns the
task.

Cc: Hugh Dickins <hugh@xxxxxxxxxxx>
Cc: Paul Mundt <lethal@xxxxxxxxxxxx>
Cc: Christoph Lameter <clameter@xxxxxxx>
Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx>
---
fs/proc/base.c | 31 +++++++++++++++++++++++++++++++
fs/proc/task_mmu.c | 37 +++++++++++++++++++++++++++++++++++++
include/linux/proc_fs.h | 1 +
3 files changed, 69 insertions(+), 0 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea..b50315f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -715,6 +715,35 @@ static struct file_operations proc_oom_adjust_operations = {
.write = oom_adjust_write,
};

+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ if (!simple_strtol(buffer, &end, 0))
+ return -EINVAL;
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ clear_refs_smap(task->mm->mmap);
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
+}
+
+static struct file_operations proc_clear_refs_operations = {
+ .write = clear_refs_write,
+};
+
#ifdef CONFIG_AUDITSYSCALL
#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -1856,6 +1885,7 @@ static struct pid_entry tgid_base_stuff[] = {
REG("mounts", S_IRUGO, mounts),
REG("mountstats", S_IRUSR, mountstats),
#ifdef CONFIG_MMU
+ REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
#endif
#ifdef CONFIG_SECURITY
@@ -2137,6 +2167,7 @@ static struct pid_entry tid_base_stuff[] = {
LNK("exe", exe),
REG("mounts", S_IRUGO, mounts),
#ifdef CONFIG_MMU
+ REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
#endif
#ifdef CONFIG_SECURITY
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd004..b689a92 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -322,6 +322,27 @@ static void smaps_pte_func(struct pte_walker *walker, pte_t *pte,
}

/*
+ * Called for each PTE in the struct pte_walker address range. For all normal,
+ * present pages, we clear their referenced bits.
+ */
+static void clear_refs_pte_func(struct pte_walker *walker, pte_t *pte,
+ unsigned long addr)
+{
+ struct page *page;
+ pte_t ptent;
+
+ ptent = *pte;
+ if (!pte_present(ptent))
+ return;
+
+ page = vm_normal_page(walker->vma, addr, ptent);
+ if (!page)
+ return;
+ pte_mkold(ptent);
+ ClearPageReferenced(page);
+}
+
+/*
* Displays the smap for the process. smaps_pte_func() is called for each PTE
* in the range from vma->vm_start to vma->vm_end.
*/
@@ -343,6 +364,22 @@ static int show_smap(struct seq_file *m, void *v)
return show_map_internal(m, v, &mss);
}

+void clear_refs_smap(struct vm_area_struct *vma)
+{
+ for (; vma; vma = vma->vm_next) {
+ struct pte_walker walker = {
+ .vma = vma,
+ .start = vma->vm_start,
+ .end = vma->vm_end,
+ .private = NULL,
+ .func = clear_refs_pte_func,
+ };
+
+ if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+ walk_pgds(&walker);
+ };
+}
+
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 87dec8f..f3d426b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -104,6 +104,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
unsigned long task_vsize(struct mm_struct *);
int task_statm(struct mm_struct *, int *, int *, int *, int *);
char *task_mem(struct mm_struct *, char *);
+void clear_refs_smap(struct vm_area_struct *);

extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
struct proc_dir_entry *parent);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/