[PATCH v2 07/11] arm64: ptdump: Register a debugfs entry for the host stage-2 page-tables

From: Sebastian Ene
Date: Thu Oct 19 2023 - 10:41:29 EST


Initialize a structures used to keep the state of the host stage-2 ptdump
walker when pKVM is enabled. Create a new debugfs entry for the host
stage-2 pagetables and hook the callbacks invoked when the entry is
accessed. When the debugfs file is opened, allocate memory resources which
will be shared with the hypervisor for saving the pagetable snapshot.
On close release the associated memory and we unshare it from the
hypervisor.

Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
---
arch/arm64/include/asm/ptdump.h | 2 +
arch/arm64/kvm/Kconfig | 12 +++
arch/arm64/mm/ptdump.c | 161 ++++++++++++++++++++++++++++++++
3 files changed, 175 insertions(+)

diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 88dcab1dab97..35b883524462 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -23,6 +23,8 @@ struct ptdump_info {
void (*ptdump_walk)(struct seq_file *s, struct ptdump_info *info);
void (*ptdump_end_walk)(struct ptdump_info *info);
struct mutex file_lock;
+ size_t mc_len;
+ void *priv;
};

void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 83c1e09be42e..4b1847704bb3 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -71,4 +71,16 @@ config PROTECTED_NVHE_STACKTRACE

If unsure, or not using protected nVHE (pKVM), say N.

+config NVHE_EL2_PTDUMP_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on NVHE_EL2_DEBUG && PTDUMP_DEBUGFS && KVM
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index fe239b9af50c..7c78b8994ca1 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -466,6 +466,165 @@ void ptdump_check_wx(void)
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}

+#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS
+static struct ptdump_info stage2_kernel_ptdump_info;
+
+static phys_addr_t ptdump_host_pa(void *addr)
+{
+ return __pa(addr);
+}
+
+static void *ptdump_host_va(phys_addr_t phys)
+{
+ return __va(phys);
+}
+
+static size_t stage2_get_pgd_len(void)
+{
+ u64 mmfr0, mmfr1, vtcr;
+ u32 phys_shift = get_kvm_ipa_limit();
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+
+ return kvm_pgtable_stage2_pgd_size(vtcr);
+}
+
+static void stage2_ptdump_prepare_walk(struct ptdump_info *info)
+{
+ struct kvm_pgtable_snapshot *snapshot;
+ int ret, pgd_index, mc_index, pgd_pages_sz;
+ void *page_hva;
+ phys_addr_t pgd;
+
+ snapshot = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!snapshot)
+ return;
+
+ memset(snapshot, 0, PAGE_SIZE);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(snapshot));
+ if (ret)
+ goto free_snapshot;
+
+ snapshot->pgd_len = stage2_get_pgd_len();
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ snapshot->pgd_hva = alloc_pages_exact(snapshot->pgd_len,
+ GFP_KERNEL_ACCOUNT);
+ if (!snapshot->pgd_hva)
+ goto unshare_snapshot;
+
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret)
+ goto unshare_pgd_pages;
+ }
+
+ for (mc_index = 0; mc_index < info->mc_len; mc_index++) {
+ page_hva = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ if (!page_hva)
+ goto free_memcache_pages;
+
+ push_hyp_memcache(&snapshot->mc, page_hva, ptdump_host_pa);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
+ virt_to_pfn(page_hva));
+ if (ret) {
+ pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ goto free_memcache_pages;
+ }
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_copy_host_stage2, snapshot);
+ if (ret)
+ goto free_memcache_pages;
+
+ pgd = (phys_addr_t)snapshot->pgtable.pgd;
+ snapshot->pgtable.pgd = phys_to_virt(pgd);
+ info->priv = snapshot;
+ return;
+
+free_memcache_pages:
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+unshare_pgd_pages:
+ pgd_index = pgd_index - 1;
+ for (; pgd_index >= 0; pgd_index--) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+unshare_snapshot:
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+free_snapshot:
+ free_pages_exact(snapshot, PAGE_SIZE);
+ info->priv = NULL;
+}
+
+static void stage2_ptdump_end_walk(struct ptdump_info *info)
+{
+ struct kvm_pgtable_snapshot *snapshot = info->priv;
+ void *page_hva;
+ int pgd_index, ret, pgd_pages_sz;
+
+ if (!snapshot)
+ return;
+
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ while (page_hva) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ free_pages_exact(page_hva, PAGE_SIZE);
+ page_hva = pop_hyp_memcache(&snapshot->mc, ptdump_host_va);
+ }
+
+ pgd_pages_sz = snapshot->pgd_len / PAGE_SIZE;
+ for (pgd_index = 0; pgd_index < pgd_pages_sz; pgd_index++) {
+ page_hva = snapshot->pgd_hva + pgd_index * PAGE_SIZE;
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(page_hva));
+ WARN_ON(ret);
+ }
+
+ free_pages_exact(snapshot->pgd_hva, snapshot->pgd_len);
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp,
+ virt_to_pfn(snapshot)));
+ free_pages_exact(snapshot, PAGE_SIZE);
+ info->priv = NULL;
+}
+#endif /* CONFIG_NVHE_EL2_PTDUMP_DEBUGFS */
+
+static void __init ptdump_register_host_stage2(void)
+{
+#ifdef CONFIG_NVHE_EL2_PTDUMP_DEBUGFS
+ if (!is_protected_kvm_enabled())
+ return;
+
+ stage2_kernel_ptdump_info = (struct ptdump_info) {
+ .mc_len = host_s2_pgtable_pages(),
+ .ptdump_prepare_walk = stage2_ptdump_prepare_walk,
+ .ptdump_end_walk = stage2_ptdump_end_walk,
+ };
+
+ mutex_init(&stage2_kernel_ptdump_info.file_lock);
+
+ ptdump_debugfs_register(&stage2_kernel_ptdump_info,
+ "host_stage2_kernel_page_tables");
+#endif
+}
+
static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
@@ -474,6 +633,8 @@ static int __init ptdump_init(void)
#endif
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
+ ptdump_register_host_stage2();
+
return 0;
}
device_initcall(ptdump_init);
--
2.42.0.655.g421f12c284-goog