[PATCH Part1 RFC v4 30/36] x86/sev: enable SEV-SNP-validated CPUID in #VC handlers

From: Brijesh Singh
Date: Wed Jul 07 2021 - 14:18:02 EST


From: Michael Roth <michael.roth@xxxxxxx>

This adds support for utilizing the SEV-SNP-validated CPUID table in
the various #VC handler routines used throughout boot/run-time. Mostly
this is handled by re-using the CPUID lookup code introduced earlier
for the boot/compressed kernel, but at various stages of boot some work
needs to be done to ensure the CPUID table is set up and remains
accessible throughout. The following init routines are introduced to
handle this:

sev_snp_cpuid_init():

This sets up access to the CPUID memory range for the #VC handler
that gets set up just after entry to startup_64(). Since the code is
still using an identity mapping, the existing sev_snp_cpuid_init()
used by boot/compressed is used here as well, but annotated as __init
so it can be cleaned up later (boot/compressed/sev.c already defines
away __init when it pulls in shared SEV code). The boot/compressed
kernel handles any necessary lookup of ConfidentialComputing blob
from EFI and puts it into boot_params if present, so only boot_params
needs to be checked.

sev_snp_cpuid_init_virtual():

This is called when the previous identity mapping is gone and the
memory used for the CPUID memory range needs to be mapped into the
new page table with encryption bit set and accessed via __va().

Since this path is also entered later by APs to set up their initial
VC handlers, a function pointer is used to switch them to a handler
that doesn't attempt to re-initialize the SNP CPUID feature, as at
that point it will have already been set up.

sev_snp_cpuid_init_remap_early():

This is called when the previous mapping of CPUID memory range is no
longer present. early_memremap() is now available, so use that to
create a new one that can be used until memremap() is available.

sev_snp_cpuid_init_remap():

This switches away from using early_memremap() to ioremap_encrypted()
to map CPUID memory range, otherwise the leak detector will complain.
This mapping is what gets used for the remaining life of the guest.

Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
arch/x86/include/asm/realmode.h | 1 +
arch/x86/include/asm/setup.h | 5 +-
arch/x86/include/asm/sev.h | 8 +++
arch/x86/kernel/head64.c | 21 ++++++--
arch/x86/kernel/head_64.S | 6 ++-
arch/x86/kernel/setup.c | 3 ++
arch/x86/kernel/sev-shared.c | 93 ++++++++++++++++++++++++++++++++-
arch/x86/kernel/smpboot.c | 2 +
8 files changed, 129 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 5db5d083c873..ff0eecee4235 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -63,6 +63,7 @@ extern unsigned long initial_stack;
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern unsigned long initial_vc_handler;
#endif
+extern unsigned long initial_idt_setup;

extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index a12458a7a8d4..12fc52894ad8 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -50,8 +50,9 @@ extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
extern unsigned long __startup_secondary_64(void);
-extern void startup_64_setup_env(unsigned long physbase);
-extern void early_setup_idt(void);
+extern void startup_64_setup_env(unsigned long physbase, struct boot_params *bp);
+extern void early_setup_idt_common(void *rmode);
+extern void __init early_setup_idt(void *rmode);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);

#ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index b5715a26361a..6c23e694a109 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -133,6 +133,10 @@ bool sev_snp_enabled(void);
#endif

void sev_snp_cpuid_init(struct boot_params *bp);
+#ifndef __BOOT_COMPRESSED
+void sev_snp_cpuid_init_virtual(void);
+void sev_snp_cpuid_init_remap_early(void);
+#endif /* __BOOT_COMPRESSED */
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -154,6 +158,10 @@ static inline bool sev_snp_enabled { return false; }
#endif

static inline void sev_snp_cpuid_init(struct boot_params *bp) { }
+#ifndef __BOOT_COMPRESSED
+static inline void sev_snp_cpuid_init_virtual(void) { }
+static inline void sev_snp_cpuid_init_remap_early(void) { }
+#endif /* __BOOT_COMPRESSED */
#endif

#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8615418f98f1..de3b4f1afbfe 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -567,7 +567,7 @@ static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
}

/* This runs while still in the direct mapping */
-static void startup_64_load_idt(unsigned long physbase)
+static void startup_64_load_idt(unsigned long physbase, struct boot_params *bp)
{
struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
@@ -577,6 +577,7 @@ static void startup_64_load_idt(unsigned long physbase)
void *handler;

/* VMM Communication Exception */
+ sev_snp_cpuid_init(bp); /* used by #VC handler */
handler = fixup_pointer(vc_no_ghcb, physbase);
set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
}
@@ -585,8 +586,8 @@ static void startup_64_load_idt(unsigned long physbase)
native_load_idt(desc);
}

-/* This is used when running on kernel addresses */
-void early_setup_idt(void)
+/* Used for all CPUs */
+void early_setup_idt_common(void *rmode)
{
/* VMM Communication Exception */
if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
@@ -596,10 +597,20 @@ void early_setup_idt(void)
native_load_idt(&bringup_idt_descr);
}

+/* This is used by boot processor when running on kernel addresses */
+void __init early_setup_idt(void *rmode)
+{
+ /* SEV-SNP CPUID setup for use by #VC handler */
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ sev_snp_cpuid_init_virtual();
+
+ early_setup_idt_common(rmode);
+}
+
/*
* Setup boot CPU state needed before kernel switches to virtual addresses.
*/
-void __head startup_64_setup_env(unsigned long physbase)
+void __head startup_64_setup_env(unsigned long physbase, struct boot_params *bp)
{
u64 gs_area = (u64)fixup_pointer(startup_gs_area, physbase);

@@ -623,5 +634,5 @@ void __head startup_64_setup_env(unsigned long physbase)
*/
native_wrmsr(MSR_GS_BASE, gs_area, gs_area >> 32);

- startup_64_load_idt(physbase);
+ startup_64_load_idt(physbase, bp);
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..78f35e446498 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -218,7 +218,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)

/* Setup and Load IDT */
pushq %rsi
- call early_setup_idt
+ movq %rsi, %rdi
+ movq initial_idt_setup(%rip), %rax
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rax
popq %rsi

/* Check if nx is implemented */
@@ -341,6 +344,7 @@ SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
+SYM_DATA(initial_idt_setup, .quad early_setup_idt)

/*
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 85acd22f8022..5ff264917b5b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -47,6 +47,7 @@
#include <asm/thermal.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
+#include <asm/sev.h>
#include <linux/vmalloc.h>

/*
@@ -1077,6 +1078,8 @@ void __init setup_arch(char **cmdline_p)

init_mem_mapping();

+ sev_snp_cpuid_init_remap_early();
+
idt_setup_early_pf();

/*
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 23328727caf4..dbc5c2600d9d 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -264,7 +264,7 @@ static int sev_es_cpuid_msr_proto(u32 func, u32 subfunc, u32 *eax, u32 *ebx,
return 0;
}

-static bool sev_snp_cpuid_active(void)
+static inline bool sev_snp_cpuid_active(void)
{
return sev_snp_cpuid_enabled;
}
@@ -905,7 +905,7 @@ static struct cc_blob_sev_info *sev_snp_probe_cc_blob(struct boot_params *bp)
* indication that SEV-ES is enabled. Subsequent init levels will check for
* SEV_SNP feature once available to also take SEV MSR value into account.
*/
-void sev_snp_cpuid_init(struct boot_params *bp)
+void __init sev_snp_cpuid_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;

@@ -941,3 +941,92 @@ void sev_snp_cpuid_init(struct boot_params *bp)
if (cpuid_info->count > 0)
sev_snp_cpuid_enabled = 1;
}
+
+#ifndef __BOOT_COMPRESSED
+
+static bool __init early_make_pgtable_enc(unsigned long physaddr)
+{
+ pmdval_t pmd;
+
+ /* early_pmd_flags hasn't been updated with SME bit yet; add it */
+ pmd = (physaddr & PMD_MASK) + early_pmd_flags + sme_get_me_mask();
+
+ return __early_make_pgtable((unsigned long)__va(physaddr), pmd);
+}
+
+/*
+ * This is called when we switch to virtual kernel addresses, before #PF
+ * handler is set up. boot_params have already been parsed at this point,
+ * but CPUID page is no longer identity-mapped so we need to create a
+ * virtual mapping.
+ */
+void __init sev_snp_cpuid_init_virtual(void)
+{
+ /*
+ * We rely on sev_snp_cpuid_init() to do initial parsing of bootparams
+ * and initial setup. If that didn't enable the feature then don't try
+ * to enable it here.
+ */
+ if (!sev_snp_cpuid_active())
+ return;
+
+ /*
+ * Either boot_params/EFI advertised the feature even though SNP isn't
+ * enabled, or something else went wrong. Bail out.
+ */
+ if (!sev_feature_enabled(SEV_SNP))
+ sev_es_terminate(1, GHCB_TERM_CPUID);
+
+ /* If feature is enabled, but we can't map CPUID info, we're hosed */
+ if (!early_make_pgtable_enc(sev_snp_cpuid_pa))
+ sev_es_terminate(1, GHCB_TERM_CPUID);
+
+ cpuid_info = (const struct sev_snp_cpuid_info *)__va(sev_snp_cpuid_pa);
+}
+
+/* Called after early_ioremap_init() */
+void __init sev_snp_cpuid_init_remap_early(void)
+{
+ if (!sev_snp_cpuid_active())
+ return;
+
+ /*
+ * This really shouldn't be possible at this point.
+ */
+ if (!sev_feature_enabled(SEV_SNP))
+ sev_es_terminate(1, GHCB_TERM_CPUID);
+
+ cpuid_info = early_memremap(sev_snp_cpuid_pa, sev_snp_cpuid_sz);
+}
+
+/* Final switch to run-time mapping */
+static int __init sev_snp_cpuid_init_remap(void)
+{
+ if (!sev_snp_cpuid_active())
+ return 0;
+
+ /*
+ * This really shouldn't be possible at this point either.
+ */
+ if (!sev_feature_enabled(SEV_SNP))
+ sev_es_terminate(1, GHCB_TERM_CPUID);
+
+ /* Clean up earlier mapping. */
+ if (cpuid_info)
+ early_memunmap((void *)cpuid_info, sev_snp_cpuid_sz);
+
+ /*
+ * We need ioremap_encrypted() to get an encrypted mapping, but this
+ * is normal RAM so can be accessed directly.
+ */
+ cpuid_info = (__force void *)ioremap_encrypted(sev_snp_cpuid_pa,
+ sev_snp_cpuid_sz);
+ if (!cpuid_info)
+ return -EIO;
+
+ return 0;
+}
+
+arch_initcall(sev_snp_cpuid_init_remap);
+
+#endif /* __BOOT_COMPRESSED */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4fc07006f7f8..d3f4993b89cc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1041,6 +1041,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
+ /* don't repeat IDT setup work specific to the BSP */
+ initial_idt_setup = (unsigned long)early_setup_idt_common;

/* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
--
2.17.1