[PATCH 3/9] kstaled: page_referenced_kstaled() and supporting infrastructure.

From: Michel Lespinasse
Date: Tue Sep 27 2011 - 20:50:58 EST


Add a new page_referenced_kstaled() interface. The desired behavior
is that page_referenced() returns page references since the last
page_referenced() call, and page_referenced_kstaled() returns page
references since the last page_referenced_kstaled() call, but they
are both independent of each other and do not influence each other.

The following events are counted as kstaled page references:
- CPU data access to the page (as noticed through pte_young());
- mark_page_accessed() calls;
- page being freed / reallocated.


Signed-off-by: Michel Lespinasse <walken@xxxxxxxxxx>
---
include/linux/page-flags.h | 35 ++++++++++++++++++++++
include/linux/rmap.h | 68 +++++++++++++++++++++++++++++++++++++++----
mm/rmap.c | 62 ++++++++++++++++++++++++++++-----------
mm/swap.c | 1 +
4 files changed, 141 insertions(+), 25 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6081493..e964d98 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,13 @@
* PG_hwpoison indicates that a page got corrupted in hardware and contains
* data with incorrect ECC bits that triggered a machine check. Accessing is
* not safe since it may cause another machine check. Don't touch!
+ *
+ * PG_young indicates that kstaled cleared the young bit on some PTEs pointing
+ * to that page. In order to avoid interacting with the LRU algorithm, we want
+ * the next page_referenced() call to still consider the page young.
+ *
+ * PG_idle indicates that the page has not been referenced since the last time
+ * kstaled scanned it.
*/

/*
@@ -107,6 +114,10 @@ enum pageflags {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
#endif
+#ifdef CONFIG_KSTALED
+ PG_young, /* kstaled cleared pte_young */
+ PG_idle, /* idle since start of kstaled interval */
+#endif
__NR_PAGEFLAGS,

/* Filesystems */
@@ -278,6 +289,30 @@ PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif

+#ifdef CONFIG_KSTALED
+
+PAGEFLAG(Young, young)
+PAGEFLAG(Idle, idle)
+
+static inline void set_page_young(struct page *page)
+{
+ if (!PageYoung(page))
+ SetPageYoung(page);
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ if (PageIdle(page))
+ ClearPageIdle(page);
+}
+
+#else /* !CONFIG_KSTALED */
+
+static inline void set_page_young(struct page *page) {}
+static inline void clear_page_idle(struct page *page) {}
+
+#endif /* CONFIG_KSTALED */
+
u64 stable_page_flags(struct page *page);

static inline int PageUptodate(struct page *page)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 82fef42..88a0b85 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -74,6 +74,8 @@ struct page_referenced_info {
unsigned long vm_flags;
unsigned int pr_flags;
#define PR_REFERENCED 1
+#define PR_DIRTY 2
+#define PR_FOR_KSTALED 4
};

#ifdef CONFIG_MMU
@@ -165,8 +167,8 @@ static inline void page_dup_rmap(struct page *page)
/*
* Called from mm/vmscan.c to handle paging out
*/
-void page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt,
- struct page_referenced_info *info);
+void __page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt,
+ struct page_referenced_info *info);
void page_referenced_one(struct page *, struct vm_area_struct *,
unsigned long address, unsigned int *mapcount,
struct page_referenced_info *info);
@@ -244,12 +246,10 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
#define anon_vma_prepare(vma) (0)
#define anon_vma_link(vma) do {} while (0)

-static inline void page_referenced(struct page *page, int is_locked,
- struct mem_cgroup *cnt,
- struct page_referenced_info *info)
+static inline void __page_referenced(struct page *page, int is_locked,
+ struct mem_cgroup *cnt,
+ struct page_referenced_info *info)
{
- info->vm_flags = 0;
- info->pr_flags = 0;
}

#define try_to_unmap(page, refs) SWAP_FAIL
@@ -262,6 +262,60 @@ static inline int page_mkclean(struct page *page)

#endif /* CONFIG_MMU */

+/**
+ * page_referenced - test if the page was referenced
+ * @page: the page to test
+ * @is_locked: caller holds lock on the page
+ * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
+ *
+ * Quick test_and_clear_referenced for all mappings to a page,
+ * returns the number of ptes which referenced the page.
+ */
+static inline void page_referenced(struct page *page,
+ int is_locked,
+ struct mem_cgroup *mem_cont,
+ struct page_referenced_info *info)
+{
+ info->vm_flags = 0;
+ info->pr_flags = 0;
+
+#ifdef CONFIG_KSTALED
+ /*
+ * Always clear PageYoung at the start of a scanning interval. It will
+ * get get set if kstaled clears a young bit in a pte reference,
+ * so that vmscan will still see the page as referenced.
+ */
+ if (PageYoung(page)) {
+ ClearPageYoung(page);
+ info->pr_flags |= PR_REFERENCED;
+ }
+#endif
+
+ __page_referenced(page, is_locked, mem_cont, info);
+}
+
+#ifdef CONFIG_KSTALED
+static inline void page_referenced_kstaled(struct page *page, bool is_locked,
+ struct page_referenced_info *info)
+{
+ info->vm_flags = 0;
+ info->pr_flags = PR_FOR_KSTALED;
+
+ /*
+ * Always set PageIdle at the start of a scanning interval. It will
+ * get cleared if a young page reference is encountered; otherwise
+ * the page will be counted as idle at the next kstaled scan cycle.
+ */
+ if (!PageIdle(page)) {
+ SetPageIdle(page);
+ info->pr_flags |= PR_REFERENCED;
+ }
+
+ __page_referenced(page, is_locked, NULL, info);
+}
+#endif
+
/*
* Return values of try_to_unmap
*/
diff --git a/mm/rmap.c b/mm/rmap.c
index f87afd0..fa8440e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -670,6 +670,8 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
return;
}

+ info->pr_flags |= PR_DIRTY;
+
if (vma->vm_flags & VM_LOCKED) {
spin_unlock(&mm->page_table_lock);
*mapcount = 0; /* break early from loop */
@@ -678,8 +680,17 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
}

/* go ahead even if the pmd is pmd_trans_splitting() */
- if (pmdp_clear_flush_young_notify(vma, address, pmd))
- referenced = true;
+ if (!(info->pr_flags & PR_FOR_KSTALED)) {
+ if (pmdp_clear_flush_young_notify(vma, address, pmd)) {
+ referenced = true;
+ clear_page_idle(page);
+ }
+ } else {
+ if (pmdp_test_and_clear_young(vma, address, pmd)) {
+ referenced = true;
+ set_page_young(page);
+ }
+ }
spin_unlock(&mm->page_table_lock);
} else {
pte_t *pte;
@@ -693,6 +704,9 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
if (!pte)
return;

+ if (pte_dirty(*pte))
+ info->pr_flags |= PR_DIRTY;
+
if (vma->vm_flags & VM_LOCKED) {
pte_unmap_unlock(pte, ptl);
*mapcount = 0; /* break early from loop */
@@ -700,23 +714,38 @@ void page_referenced_one(struct page *page, struct vm_area_struct *vma,
return;
}

- if (ptep_clear_flush_young_notify(vma, address, pte)) {
+ if (!(info->pr_flags & PR_FOR_KSTALED)) {
+ if (ptep_clear_flush_young_notify(vma, address, pte)) {
+ /*
+ * Don't treat a reference through a
+ * sequentially read mapping as such.
+ * If the page has been used in another
+ * mapping, we will catch it; if this other
+ * mapping is already gone, the unmap path
+ * will have set PG_referenced or activated
+ * the page.
+ */
+ if (likely(!VM_SequentialReadHint(vma)))
+ referenced = true;
+ clear_page_idle(page);
+ }
+ } else {
/*
- * Don't treat a reference through a sequentially read
- * mapping as such. If the page has been used in
- * another mapping, we will catch it; if this other
- * mapping is already gone, the unmap path will have
- * set PG_referenced or activated the page.
+ * Within page_referenced_kstaled():
+ * skip TLB shootdown & VM_SequentialReadHint heuristic
*/
- if (likely(!VM_SequentialReadHint(vma)))
+ if (ptep_test_and_clear_young(vma, address, pte)) {
referenced = true;
+ set_page_young(page);
+ }
}
pte_unmap_unlock(pte, ptl);
}

/* Pretend the page is referenced if the task has the
swap token and is in the middle of a page fault. */
- if (mm != current->mm && has_swap_token(mm) &&
+ if (!(info->pr_flags & PR_FOR_KSTALED) &&
+ mm != current->mm && has_swap_token(mm) &&
rwsem_is_locked(&mm->mmap_sem))
referenced = true;

@@ -828,7 +857,7 @@ static void page_referenced_file(struct page *page,
}

/**
- * page_referenced - test if the page was referenced
+ * __page_referenced - test if the page was referenced
* @page: the page to test
* @is_locked: caller holds lock on the page
* @mem_cont: target memory controller
@@ -838,16 +867,13 @@ static void page_referenced_file(struct page *page,
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of ptes which referenced the page.
*/
-void page_referenced(struct page *page,
- int is_locked,
- struct mem_cgroup *mem_cont,
- struct page_referenced_info *info)
+void __page_referenced(struct page *page,
+ int is_locked,
+ struct mem_cgroup *mem_cont,
+ struct page_referenced_info *info)
{
int we_locked = 0;

- info->vm_flags = 0;
- info->pr_flags = 0;
-
if (page_mapped(page) && page_rmapping(page)) {
if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
we_locked = trylock_page(page);
diff --git a/mm/swap.c b/mm/swap.c
index 3a442f1..d65b69e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -344,6 +344,7 @@ void mark_page_accessed(struct page *page)
} else if (!PageReferenced(page)) {
SetPageReferenced(page);
}
+ clear_page_idle(page);
}

EXPORT_SYMBOL(mark_page_accessed);
--
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/