[PATCH Part2 v5 09/45] x86/fault: Add support to dump RMP entry on fault

From: Brijesh Singh
Date: Fri Aug 20 2021 - 12:02:16 EST


When SEV-SNP is enabled globally, a write from the host goes through the
RMP check. If the hardware encounters the check failure, then it raises
the #PF (with RMP set). Dump the RMP entry at the faulting pfn to help
the debug.

Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
arch/x86/include/asm/sev.h | 7 +++++++
arch/x86/kernel/sev.c | 43 ++++++++++++++++++++++++++++++++++++++
arch/x86/mm/fault.c | 17 +++++++++++----
include/linux/sev.h | 2 ++
4 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 92ced9626e95..569294f687e6 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -106,6 +106,11 @@ struct __packed rmpentry {

#define rmpentry_assigned(x) ((x)->info.assigned)
#define rmpentry_pagesize(x) ((x)->info.pagesize)
+#define rmpentry_vmsa(x) ((x)->info.vmsa)
+#define rmpentry_asid(x) ((x)->info.asid)
+#define rmpentry_validated(x) ((x)->info.validated)
+#define rmpentry_gpa(x) ((unsigned long)(x)->info.gpa)
+#define rmpentry_immutable(x) ((x)->info.immutable)

#define RMPADJUST_VMSA_PAGE_BIT BIT(16)

@@ -165,6 +170,7 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op
void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
void snp_set_wakeup_secondary_cpu(void);
+void dump_rmpentry(u64 pfn);
#ifdef __BOOT_COMPRESSED
bool sev_snp_enabled(void);
#else
@@ -188,6 +194,7 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npage
static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline void sev_snp_cpuid_init(struct boot_params *bp) { }
+static inline void dump_rmpentry(u64 pfn) {}
#ifdef __BOOT_COMPRESSED
static inline bool sev_snp_enabled { return false; }
#else
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index bad41deb8335..8b3e83e50468 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2404,6 +2404,49 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
return entry;
}

+void dump_rmpentry(u64 pfn)
+{
+ unsigned long pfn_end;
+ struct rmpentry *e;
+ int level;
+
+ e = __snp_lookup_rmpentry(pfn, &level);
+ if (!e) {
+ pr_alert("failed to read RMP entry pfn 0x%llx\n", pfn);
+ return;
+ }
+
+ if (rmpentry_assigned(e)) {
+ pr_alert("RMPEntry paddr 0x%llx [assigned=%d immutable=%d pagesize=%d gpa=0x%lx"
+ " asid=%d vmsa=%d validated=%d]\n", pfn << PAGE_SHIFT,
+ rmpentry_assigned(e), rmpentry_immutable(e), rmpentry_pagesize(e),
+ rmpentry_gpa(e), rmpentry_asid(e), rmpentry_vmsa(e),
+ rmpentry_validated(e));
+ return;
+ }
+
+ /*
+ * If the RMP entry at the faulting pfn was not assigned, then we do not
+ * know what caused the RMP violation. To get some useful debug information,
+ * let iterate through the entire 2MB region, and dump the RMP entries if
+ * one of the bit in the RMP entry is set.
+ */
+ pfn = pfn & ~(PTRS_PER_PMD - 1);
+ pfn_end = pfn + PTRS_PER_PMD;
+
+ while (pfn < pfn_end) {
+ e = __snp_lookup_rmpentry(pfn, &level);
+ if (!e)
+ return;
+
+ if (e->low || e->high)
+ pr_alert("RMPEntry paddr 0x%llx: [high=0x%016llx low=0x%016llx]\n",
+ pfn << PAGE_SHIFT, e->high, e->low);
+ pfn++;
+ }
+}
+EXPORT_SYMBOL_GPL(dump_rmpentry);
+
/*
* Return 1 if the RMP entry is assigned, 0 if it exists but is not assigned,
* and -errno if there is no corresponding RMP entry.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f2d543b92f43..9cd33169dfb5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -33,6 +33,7 @@
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
+#include <asm/sev.h> /* dump_rmpentry() */

#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -289,7 +290,7 @@ static bool low_pfn(unsigned long pfn)
return pfn < max_low_pfn;
}

-static void dump_pagetable(unsigned long address)
+static void dump_pagetable(unsigned long address, bool show_rmpentry)
{
pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(address)];
@@ -345,10 +346,11 @@ static int bad_address(void *p)
return get_kernel_nofault(dummy, (unsigned long *)p);
}

-static void dump_pagetable(unsigned long address)
+static void dump_pagetable(unsigned long address, bool show_rmpentry)
{
pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = base + pgd_index(address);
+ unsigned long pfn;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -366,6 +368,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(p4d))
goto bad;

+ pfn = p4d_pfn(*p4d);
pr_cont("P4D %lx ", p4d_val(*p4d));
if (!p4d_present(*p4d) || p4d_large(*p4d))
goto out;
@@ -374,6 +377,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pud))
goto bad;

+ pfn = pud_pfn(*pud);
pr_cont("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud) || pud_large(*pud))
goto out;
@@ -382,6 +386,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pmd))
goto bad;

+ pfn = pmd_pfn(*pmd);
pr_cont("PMD %lx ", pmd_val(*pmd));
if (!pmd_present(*pmd) || pmd_large(*pmd))
goto out;
@@ -390,9 +395,13 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pte))
goto bad;

+ pfn = pte_pfn(*pte);
pr_cont("PTE %lx", pte_val(*pte));
out:
pr_cont("\n");
+
+ if (show_rmpentry)
+ dump_rmpentry(pfn);
return;
bad:
pr_info("BAD\n");
@@ -578,7 +587,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
show_ldttss(&gdt, "TR", tr);
}

- dump_pagetable(address);
+ dump_pagetable(address, error_code & X86_PF_RMP);
}

static noinline void
@@ -595,7 +604,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,

printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
tsk->comm, address);
- dump_pagetable(address);
+ dump_pagetable(address, false);

if (__die("Bad pagetable", regs, error_code))
sig = 0;
diff --git a/include/linux/sev.h b/include/linux/sev.h
index 1a68842789e1..734b13a69c54 100644
--- a/include/linux/sev.h
+++ b/include/linux/sev.h
@@ -16,6 +16,7 @@ int snp_lookup_rmpentry(u64 pfn, int *level);
int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, int asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
+void dump_rmpentry(u64 pfn);
#else
static inline int snp_lookup_rmpentry(u64 pfn, int *level) { return 0; }
static inline int psmash(u64 pfn) { return -ENXIO; }
@@ -25,6 +26,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, int as
return -ENODEV;
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void dump_rmpentry(u64 pfn) { }

#endif /* CONFIG_AMD_MEM_ENCRYPT */
#endif /* __LINUX_SEV_H */
--
2.17.1