[RFC PATCH v2 5/9] mm, pgtable: Add a refcount to PTE table

From: Chih-En Lin
Date: Tue Sep 27 2022 - 12:28:53 EST


Reuse the _refcount in struct page for the page table to maintain the
number of process references to COWed PTE table. Before decreasing the
refcount, it will check whether refcount is one or not for reusing
shared PTE table.

Signed-off-by: Chih-En Lin <shiyn.lin@xxxxxxxxx>
---
include/linux/mm.h | 1 +
include/linux/pgtable.h | 28 ++++++++++++++++++++++++++++
mm/memory.c | 1 +
3 files changed, 30 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 965523dcca3b8..bfe6a8c7ab9ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2290,6 +2290,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page)
__SetPageTable(page);
inc_lruvec_page_state(page, NR_PAGETABLE);
page->cow_pte_owner = NULL;
+ set_page_count(page, 1);
return true;
}

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 25c1e5c42fdf3..8b497d7d800ed 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_MMU

#include <linux/mm_types.h>
+#include <linux/page_ref.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <asm-generic/pgtable_uffd.h>
@@ -628,6 +629,33 @@ static inline bool cow_pte_owner_is_same(pmd_t *pmd, pmd_t *owner)
return smp_load_acquire(&pmd_page(*pmd)->cow_pte_owner) == owner;
}

+static inline int pmd_get_pte(pmd_t *pmd)
+{
+ return page_ref_inc_return(pmd_page(*pmd));
+}
+
+/*
+ * If the COW PTE refcount is 1, instead of decreasing the counter,
+ * clear write protection of the corresponding PMD entry and reset
+ * the COW PTE owner to reuse the table.
+ * But if the reuse parameter is false, do not thing. This help us
+ * to handle the situation that PTE table we already handled.
+ */
+static inline int pmd_put_pte(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, bool reuse)
+{
+ if (!page_ref_add_unless(pmd_page(*pmd), -1, 1) && reuse) {
+ cow_pte_fallback(vma, pmd, addr);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int cow_pte_count(pmd_t *pmd)
+{
+ return page_count(pmd_page(*pmd));
+}
+
#ifndef pte_access_permitted
#define pte_access_permitted(pte, write) \
(pte_present(pte) && (!(write) || pte_write(pte)))
diff --git a/mm/memory.c b/mm/memory.c
index d29f84801f3cd..3e66e229f4169 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2875,6 +2875,7 @@ void cow_pte_fallback(struct vm_area_struct *vma, pmd_t *pmd,
pmd_t new;

VM_WARN_ON(pmd_write(*pmd));
+ VM_WARN_ON(cow_pte_count(pmd) != 1);

start = addr & PMD_MASK;
end = (addr + PMD_SIZE) & PMD_MASK;
--
2.37.3