[PATCH v12 11/22] mm: pagewalk: Add p4d_entry() and pgd_entry()

From: Steven Price
Date: Fri Oct 18 2019 - 06:14:45 EST


pgd_entry() and pud_entry() were removed by commit 0b1fbfe50006c410
("mm/pagewalk: remove pgd_entry() and pud_entry()") because there were
no users. We're about to add users so reintroduce them, along with
p4d_entry() as we now have 5 levels of tables.

Note that commit a00cc7d9dd93d66a ("mm, x86: add support for
PUD-sized transparent hugepages") already re-added pud_entry() but with
different semantics to the other callbacks. Since there have never
been upstream users of this, revert the semantics back to match the
other callbacks. This means pud_entry() is called for all entries, not
just transparent huge pages.

Signed-off-by: Steven Price <steven.price@xxxxxxx>
---
include/linux/pagewalk.h | 19 +++++++++++++------
mm/pagewalk.c | 27 ++++++++++++++++-----------
2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index bddd9759bab9..12004b097eae 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -8,15 +8,15 @@ struct mm_walk;

/**
* mm_walk_ops - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- * this handler should only handle pud_trans_huge() puds.
- * the pmd_entry or pte_entry callbacks will be used for
- * regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
+ * @p4d_entry: if set, called for each non-empty P4D entry
+ * @pud_entry: if set, called for each non-empty PUD entry
+ * @pmd_entry: if set, called for each non-empty PMD entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
* split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
+ * @pte_entry: if set, called for each non-empty PTE (lowest-level)
+ * entry
* @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether
@@ -24,8 +24,15 @@ struct mm_walk;
* "do page table walk over the current vma", returning
* a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma"
+ *
+ * p?d_entry callbacks are called even if those levels are folded on a
+ * particular architecture/configuration.
*/
struct mm_walk_ops {
+ int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
+ int (*p4d_entry)(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pud_entry)(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d48c2a986ea3..fc4d98a3a5a0 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -93,15 +93,9 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
}

if (ops->pud_entry) {
- spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
-
- if (ptl) {
- err = ops->pud_entry(pud, addr, next, walk);
- spin_unlock(ptl);
- if (err)
- break;
- continue;
- }
+ err = ops->pud_entry(pud, addr, next, walk);
+ if (err)
+ break;
}

split_huge_pud(walk->vma, pud, addr);
@@ -135,7 +129,12 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->p4d_entry) {
+ err = ops->p4d_entry(p4d, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
err = walk_pud_range(p4d, addr, next, walk);
if (err)
break;
@@ -162,7 +161,13 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->pgd_entry) {
+ err = ops->pgd_entry(pgd, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
+ ops->pte_entry)
err = walk_p4d_range(pgd, addr, next, walk);
if (err)
break;
--
2.20.1