[PATCH v3 09/20] kvm: arm64: Make stage2 page table layout dynamic

From: Suzuki K Poulose
Date: Fri Jun 29 2018 - 07:21:27 EST


So far we had a static stage2 page table handling code, based on a
fixed IPA of 40bits. As we prepare for a configurable IPA size per
VM, make our stage2 page table code dynamic, to do the right thing
for a given VM. We ensure the existing condition is always true even
when we lift the limit on the IPA. i.e,

page table levels in stage1 >= page table levels in stage2

Support for the IPA size configuration needs other changes in the way
we configure the EL2 registers (VTTBR and VTCR). So, the IPA is still
fixed to 40bits. The patch also moves the kvm_page_empty() in asm/kvm_mmu.h
to the top, before including the asm/stage2_pgtable.h to avoid a forward
declaration.

Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
Cc: Christoffer Dall <cdall@xxxxxxxxxx>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
---
Changes since V2
- Restrict the stage2 page table to allow reusing the host page table
helpers for now, until we get stage1 independent page table helpers.
---
arch/arm64/include/asm/kvm_mmu.h | 14 +-
arch/arm64/include/asm/stage2_pgtable-nopmd.h | 42 ------
arch/arm64/include/asm/stage2_pgtable-nopud.h | 39 -----
arch/arm64/include/asm/stage2_pgtable.h | 207 +++++++++++++++++++-------
4 files changed, 159 insertions(+), 143 deletions(-)
delete mode 100644 arch/arm64/include/asm/stage2_pgtable-nopmd.h
delete mode 100644 arch/arm64/include/asm/stage2_pgtable-nopud.h

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index dbaf513..a351722 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -21,6 +21,7 @@
#include <asm/page.h>
#include <asm/memory.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_arm.h>

/*
* As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
@@ -147,6 +148,13 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK

+static inline bool kvm_page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+
+ return page_count(ptr_page) == 1;
+}
+
#include <asm/stage2_pgtable.h>

int create_hyp_mappings(void *from, void *to, pgprot_t prot);
@@ -237,12 +245,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}

-static inline bool kvm_page_empty(void *ptr)
-{
- struct page *ptr_page = virt_to_page(ptr);
- return page_count(ptr_page) == 1;
-}
-
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)

#ifdef __PAGETABLE_PMD_FOLDED
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
deleted file mode 100644
index 0280ded..0000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
-#define __ARM64_S2_PGTABLE_NOPMD_H_
-
-#include <asm/stage2_pgtable-nopud.h>
-
-#define __S2_PGTABLE_PMD_FOLDED
-
-#define S2_PMD_SHIFT S2_PUD_SHIFT
-#define S2_PTRS_PER_PMD 1
-#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
-#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
-
-#define stage2_pud_none(kvm, pud) (0)
-#define stage2_pud_present(kvm, pud) (1)
-#define stage2_pud_clear(kvm, pud) do { } while (0)
-#define stage2_pud_populate(kvm, pud, pmd) do { } while (0)
-#define stage2_pmd_offset(kvm, pud, address) ((pmd_t *)(pud))
-
-#define stage2_pmd_free(kvm, pmd) do { } while (0)
-
-#define stage2_pmd_addr_end(kvm, addr, end) (end)
-
-#define stage2_pud_huge(kvm, pud) (0)
-#define stage2_pmd_table_empty(kvm, pmdp) (0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
deleted file mode 100644
index cd6304e..0000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopud.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
-#define __ARM64_S2_PGTABLE_NOPUD_H_
-
-#define __S2_PGTABLE_PUD_FOLDED
-
-#define S2_PUD_SHIFT S2_PGDIR_SHIFT
-#define S2_PTRS_PER_PUD 1
-#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
-#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
-
-#define stage2_pgd_none(kvm, pgd) (0)
-#define stage2_pgd_present(kvm, pgd) (1)
-#define stage2_pgd_clear(kvm, pgd) do { } while (0)
-#define stage2_pgd_populate(kvm, pgd, pud) do { } while (0)
-
-#define stage2_pud_offset(kvm, pgd, address) ((pud_t *)(pgd))
-
-#define stage2_pud_free(kvm, x) do { } while (0)
-
-#define stage2_pud_addr_end(kvm, addr, end) (end)
-#define stage2_pud_table_empty(kvm, pmdp) (0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 057a405..ffc37cc 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -19,8 +19,12 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_

+#include <linux/hugetlb.h>
#include <asm/pgtable.h>

+/* The PGDIR shift for a given page table with "n" levels. */
+#define pt_levels_pgdir_shift(n) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (n))
+
/*
* The hardware supports concatenation of up to 16 tables at stage2 entry level
* and we use the feature whenever possible.
@@ -29,118 +33,209 @@
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
* This implies, the total number of page table levels at stage2 expected
- * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
* in normal translations(e.g, stage1), since we cannot have another level in
- * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ * the range (IPA_SHIFT, IPA_SHIFT - 4).
*/
-#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+#define stage2_pt_levels(ipa_shift) ARM64_HW_PGTABLE_LEVELS((ipa_shift) - 4)

/*
- * With all the supported VA_BITs and 40bit guest IPA, the following condition
- * is always true:
+ * With all the supported VA_BITs and guest IPA, the following condition
+ * must be always true:
*
- * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
+ * stage2_pt_levels <= CONFIG_PGTABLE_LEVELS
*
* We base our stage-2 page table walker helpers on this assumption and
* fall back to using the host version of the helper wherever possible.
* i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
* to using the host version, since it is guaranteed it is not folded at host.
*
- * If the condition breaks in the future, we can rearrange the host level
- * definitions and reuse them for stage2. Till then...
+ * If the condition breaks in the future, we need completely independent
+ * page table helpers. Till then...
*/
-#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
+
+#if stage2_pt_levels(KVM_PHYS_SHIFT) > CONFIG_PGTABLE_LEVELS
#error "Unsupported combination of guest IPA and host VA_BITS."
#endif

-/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
-#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
-#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
-#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
-
/*
* The number of PTRS across all concatenated stage2 tables given by the
* number of bits resolved at the initial level.
*/
-#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+#define __s2_pgd_ptrs(pa, lvls) (1 << ((pa) - pt_levels_pgdir_shift((lvls))))
+#define __s2_pgd_size(pa, lvls) (__s2_pgd_ptrs((pa), (lvls)) * sizeof(pgd_t))
+
+#define kvm_stage2_levels(kvm) stage2_pt_levels(kvm_phys_shift(kvm))
+#define stage2_pgdir_shift(kvm) \
+ pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
+#define stage2_pgdir_size(kvm) (_AC(1, UL) << stage2_pgdir_shift((kvm)))
+#define stage2_pgdir_mask(kvm) (~(stage2_pgdir_size((kvm)) - 1))
+#define stage2_pgd_ptrs(kvm) \
+ __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
+
+#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))

/*
* kvm_mmmu_cache_min_pages is the number of stage2 page table translation
* levels in addition to the PGD.
*/
-#define kvm_mmu_cache_min_pages(kvm) (STAGE2_PGTABLE_LEVELS - 1)
+#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)


-#if STAGE2_PGTABLE_LEVELS > 3
+/* PUD/PMD definitions if present */
+#define __S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define __S2_PUD_SIZE (_AC(1, UL) << __S2_PUD_SHIFT)
+#define __S2_PUD_MASK (~(__S2_PUD_SIZE - 1))

-#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
-#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
-#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
+#define __S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define __S2_PMD_SIZE (_AC(1, UL) << __S2_PMD_SHIFT)
+#define __S2_PMD_MASK (~(__S2_PMD_SIZE - 1))

-#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
-#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
-#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
-#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
-#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
+#define __s2_pud_index(addr) \
+ (((addr) >> __S2_PUD_SHIFT) & (PTRS_PER_PTE - 1))
+#define __s2_pmd_index(addr) \
+ (((addr) >> __S2_PMD_SHIFT) & (PTRS_PER_PTE - 1))

-#define stage2_pud_table_empty(kvm, pudp) kvm_page_empty(pudp)
+#define __kvm_has_stage2_levels(kvm, min_levels) \
+ ((CONFIG_PGTABLE_LEVELS >= min_levels) && (kvm_stage2_levels(kvm) >= min_levels))
+
+#define kvm_stage2_has_pgd(kvm) __kvm_has_stage2_levels(kvm, 4)
+#define kvm_stage2_has_pud(kvm) __kvm_has_stage2_levels(kvm, 3)
+
+static inline int stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
+{
+ return kvm_stage2_has_pgd(kvm) ? pgd_none(pgd) : 0;
+}
+
+static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
+{
+ if (kvm_stage2_has_pgd(kvm))
+ pgd_clear(pgdp);
+}
+
+static inline int stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
+{
+ return kvm_stage2_has_pgd(kvm) ? pgd_present(pgd) : 1;
+}
+
+static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgdp, pud_t *pud)
+{
+ if (kvm_stage2_has_pgd(kvm))
+ pgd_populate(NULL, pgdp, pud);
+ else
+ BUG();
+}
+
+static inline pud_t *stage2_pud_offset(struct kvm *kvm,
+ pgd_t *pgd, unsigned long address)
+{
+ if (kvm_stage2_has_pgd(kvm)) {
+ phys_addr_t pud_phys = pgd_page_paddr(*pgd);
+
+ pud_phys += __s2_pud_index(address) * sizeof(pud_t);
+ return __va(pud_phys);
+ }
+ return (pud_t *)pgd;
+}
+
+static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
+{
+ if (kvm_stage2_has_pgd(kvm))
+ pud_free(NULL, pud);
+}
+
+static inline int stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
+{
+ return kvm_stage2_has_pgd(kvm) && kvm_page_empty(pudp);
+}

static inline phys_addr_t
stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
- phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+ if (kvm_stage2_has_pgd(kvm)) {
+ phys_addr_t boundary = (addr + __S2_PUD_SIZE) & __S2_PUD_MASK;

- return (boundary - 1 < end - 1) ? boundary : end;
+ return (boundary - 1 < end - 1) ? boundary : end;
+ }
+ return end;
}

-#endif /* STAGE2_PGTABLE_LEVELS > 3 */
+static inline int stage2_pud_none(struct kvm *kvm, pud_t pud)
+{
+ return kvm_stage2_has_pud(kvm) ? pud_none(pud) : 0;
+}

+static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pudp)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pud_clear(pudp);
+}

-#if STAGE2_PGTABLE_LEVELS > 2
+static inline int stage2_pud_present(struct kvm *kvm, pud_t pud)
+{
+ return kvm_stage2_has_pud(kvm) ? pud_present(pud) : 1;
+}

-#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
-#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
-#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
+static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pudp, pmd_t *pmd)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pud_populate(NULL, pudp, pmd);
+ else
+ BUG();
+}

-#define stage2_pud_none(kvm, pud) pud_none(pud)
-#define stage2_pud_clear(kvm, pud) pud_clear(pud)
-#define stage2_pud_present(kvm, pud) pud_present(pud)
-#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
-#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
+static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
+ pud_t *pud, unsigned long address)
+{
+ if (kvm_stage2_has_pud(kvm)) {
+ phys_addr_t pmd_phys = pud_page_paddr(*pud);

-#define stage2_pud_huge(kvm, pud) pud_huge(pud)
-#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
+ pmd_phys += __s2_pmd_index(address) * sizeof(pmd_t);
+ return __va(pmd_phys);
+ }
+ return (pmd_t *)pud;
+}
+
+static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pmd_free(NULL, pmd);
+}
+
+static inline int stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
+{
+ return kvm_stage2_has_pud(kvm) && kvm_page_empty(pmdp);
+}

static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
- phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+ if (kvm_stage2_has_pud(kvm)) {
+ phys_addr_t boundary = (addr + __S2_PMD_SIZE) & __S2_PMD_MASK;

- return (boundary - 1 < end - 1) ? boundary : end;
+ return (boundary - 1 < end - 1) ? boundary : end;
+ }
+ return end;
}

-#endif /* STAGE2_PGTABLE_LEVELS > 2 */
+static inline int stage2_pud_huge(struct kvm *kvm, pud_t pud)
+{
+ return kvm_stage2_has_pud(kvm) ? pud_huge(pud) : 0;
+}

#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)

-#if STAGE2_PGTABLE_LEVELS == 2
-#include <asm/stage2_pgtable-nopmd.h>
-#elif STAGE2_PGTABLE_LEVELS == 3
-#include <asm/stage2_pgtable-nopud.h>
-#endif
-
-#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
-
-#define stage2_pgd_index(kvm, addr) \
- (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
+{
+ return (addr >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1);
+}

static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
- phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+ phys_addr_t boundary;

+ boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
return (boundary - 1 < end - 1) ? boundary : end;
}

--
2.7.4