Re: [PATCH v2 11/46] hugetlb: add hugetlb_pte to track HugeTLB page table entries
From: Mike Kravetz
Date: Fri Feb 24 2023 - 19:10:39 EST
On 02/18/23 00:27, James Houghton wrote:
> After high-granularity mapping, page table entries for HugeTLB pages can
> be of any size/type. (For example, we can have a 1G page mapped with a
> mix of PMDs and PTEs.) This struct is to help keep track of a HugeTLB
> PTE after we have done a page table walk.
>
> Without this, we'd have to pass around the "size" of the PTE everywhere.
> We effectively did this before; it could be fetched from the hstate,
> which we pass around pretty much everywhere.
Agreed. I can not think of a better way to handle the possibility of
having hugetlb page table entries at any level. The somewhat unfortunate
part of this is that code outside hugetlbfs proper needs to know about this.
However, there is already 'special handling' with hugetlb assumptions
in those places today.
> hugetlb_pte_present_leaf is included here as a helper function that will
> be used frequently later on.
>
> Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index a1ceb9417f01..eeacadf3272b 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
Acked-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
--
Mike Kravetz
> @@ -26,6 +26,25 @@ typedef struct { unsigned long pd; } hugepd_t;
> #define __hugepd(x) ((hugepd_t) { (x) })
> #endif
>
> +enum hugetlb_level {
> + HUGETLB_LEVEL_PTE = 1,
> + /*
> + * We always include PMD, PUD, and P4D in this enum definition so that,
> + * when logged as an integer, we can easily tell which level it is.
> + */
> + HUGETLB_LEVEL_PMD,
> + HUGETLB_LEVEL_PUD,
> + HUGETLB_LEVEL_P4D,
> + HUGETLB_LEVEL_PGD,
> +};
> +
> +struct hugetlb_pte {
> + pte_t *ptep;
> + unsigned int shift;
> + enum hugetlb_level level;
> + spinlock_t *ptl;
> +};
> +
> #ifdef CONFIG_HUGETLB_PAGE
>
> #include <linux/mempolicy.h>
> @@ -39,6 +58,20 @@ typedef struct { unsigned long pd; } hugepd_t;
> */
> #define __NR_USED_SUBPAGE 3
>
> +static inline
> +unsigned long hugetlb_pte_size(const struct hugetlb_pte *hpte)
> +{
> + return 1UL << hpte->shift;
> +}
> +
> +static inline
> +unsigned long hugetlb_pte_mask(const struct hugetlb_pte *hpte)
> +{
> + return ~(hugetlb_pte_size(hpte) - 1);
> +}
> +
> +bool hugetlb_pte_present_leaf(const struct hugetlb_pte *hpte, pte_t pte);
> +
> struct hugepage_subpool {
> spinlock_t lock;
> long count;
> @@ -1234,6 +1267,45 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h,
> return ptl;
> }
>
> +static inline
> +spinlock_t *hugetlb_pte_lockptr(struct hugetlb_pte *hpte)
> +{
> + return hpte->ptl;
> +}
> +
> +static inline
> +spinlock_t *hugetlb_pte_lock(struct hugetlb_pte *hpte)
> +{
> + spinlock_t *ptl = hugetlb_pte_lockptr(hpte);
> +
> + spin_lock(ptl);
> + return ptl;
> +}
> +
> +static inline
> +void __hugetlb_pte_init(struct hugetlb_pte *hpte, pte_t *ptep,
> + unsigned int shift, enum hugetlb_level level,
> + spinlock_t *ptl)
> +{
> + /*
> + * If 'shift' indicates that this PTE is contiguous, then @ptep must
> + * be the first pte of the contiguous bunch.
> + */
> + hpte->ptl = ptl;
> + hpte->ptep = ptep;
> + hpte->shift = shift;
> + hpte->level = level;
> +}
> +
> +static inline
> +void hugetlb_pte_init(struct mm_struct *mm, struct hugetlb_pte *hpte,
> + pte_t *ptep, unsigned int shift,
> + enum hugetlb_level level)
> +{
> + __hugetlb_pte_init(hpte, ptep, shift, level,
> + huge_pte_lockptr(shift, mm, ptep));
> +}
> +
> #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
> extern void __init hugetlb_cma_reserve(int order);
> #else
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 5ca9eae0ac42..6c74adff43b6 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1269,6 +1269,35 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
> return false;
> }
>
> +bool hugetlb_pte_present_leaf(const struct hugetlb_pte *hpte, pte_t pte)
> +{
> + pgd_t pgd;
> + p4d_t p4d;
> + pud_t pud;
> + pmd_t pmd;
> +
> + switch (hpte->level) {
> + case HUGETLB_LEVEL_PGD:
> + pgd = __pgd(pte_val(pte));
> + return pgd_present(pgd) && pgd_leaf(pgd);
> + case HUGETLB_LEVEL_P4D:
> + p4d = __p4d(pte_val(pte));
> + return p4d_present(p4d) && p4d_leaf(p4d);
> + case HUGETLB_LEVEL_PUD:
> + pud = __pud(pte_val(pte));
> + return pud_present(pud) && pud_leaf(pud);
> + case HUGETLB_LEVEL_PMD:
> + pmd = __pmd(pte_val(pte));
> + return pmd_present(pmd) && pmd_leaf(pmd);
> + case HUGETLB_LEVEL_PTE:
> + return pte_present(pte);
> + default:
> + WARN_ON_ONCE(1);
> + return false;
> + }
> +}
> +
> +
> static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
> {
> int nid = folio_nid(folio);
> --
> 2.39.2.637.g21b0678d19-goog
>