RE: [RFC PATCH V2 12/18] x86/hyperv: Initialize cpu and memory for sev-snp enlightened guest

From: Michael Kelley (LINUX)
Date: Wed Dec 28 2022 - 12:09:12 EST


From: Tianyu Lan <ltykernel@xxxxxxxxx> Sent: Friday, November 18, 2022 7:46 PM
>
> Read processor amd memory info from specific address which are
> populated by Hyper-V. Initialize smp cpu related ops, pvalidate
> system memory and add it into e820 table.
>
> Signed-off-by: Tianyu Lan <tiala@xxxxxxxxxxxxx>
> ---
> arch/x86/kernel/cpu/mshyperv.c | 75 ++++++++++++++++++++++++++++++++++
> 1 file changed, 75 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 2ea4f21c6172..f0c97210c64a 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -34,6 +34,12 @@
> #include <clocksource/hyperv_timer.h>
> #include <asm/numa.h>
> #include <asm/coco.h>
> +#include <asm/io_apic.h>
> +#include <asm/svm.h>
> +#include <asm/sev.h>
> +#include <asm/sev-snp.h>
> +#include <asm/realmode.h>
> +#include <asm/e820/api.h>
>
> /* Is Linux running as the root partition? */
> bool hv_root_partition;
> @@ -253,6 +259,33 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
> }
> #endif
>
> +static __init int hv_snp_set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
> +static __init void hv_snp_get_rtc_noop(struct timespec64 *now) { }

These two functions duplicate set_rtc_noop() and get_rtc_noop() in x86_init.c. Couldn't
the "static" be removed from these two, and just use them, like with x86_init_noop()?
You'd also need to add extern statements in x86_init.h. But making them like the
other *_init_noop() functions seems better than duplicating them.

Also, it looks like these two functions might be called well after Linux is booted, so
having them marked as "__init" seems problematic. The same is true for set_rtc_noop()
and get_rtc_noop().

I'd suggesting breaking out this RTC handling into a separate patch in the series.

> +
> +static u32 processor_count;
> +
> +static __init void hv_snp_get_smp_config(unsigned int early)
> +{
> + if (!early) {
> + while (num_processors < processor_count) {
> + early_per_cpu(x86_cpu_to_apicid, num_processors) = num_processors;
> + early_per_cpu(x86_bios_cpu_apicid, num_processors) = num_processors;
> + physid_set(num_processors, phys_cpu_present_map);
> + set_cpu_possible(num_processors, true);
> + set_cpu_present(num_processors, true);
> + num_processors++;
> + }
> + }
> +}
> +
> +struct memory_map_entry {
> + u64 starting_gpn;
> + u64 numpages;
> + u16 type;
> + u16 flags;
> + u32 reserved;
> +};
> +
> static void __init ms_hyperv_init_platform(void)
> {
> int hv_max_functions_eax;
> @@ -260,6 +293,11 @@ static void __init ms_hyperv_init_platform(void)
> int hv_host_info_ebx;
> int hv_host_info_ecx;
> int hv_host_info_edx;
> + struct memory_map_entry *entry;
> + struct e820_entry *e820_entry;
> + u64 e820_end;
> + u64 ram_end;
> + u64 page;
>
> #ifdef CONFIG_PARAVIRT
> pv_info.name = "Hyper-V";
> @@ -477,6 +515,43 @@ static void __init ms_hyperv_init_platform(void)
> if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
> mark_tsc_unstable("running on Hyper-V");
>
> + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
> + x86_platform.legacy.reserve_bios_regions = 0;
> + x86_platform.set_wallclock = hv_snp_set_rtc_noop;
> + x86_platform.get_wallclock = hv_snp_get_rtc_noop;

Should x86_platform.legacy.rtc be set to 0 as well so that add_rtc_cmos()
does not try to register the RTC as a platform device?

> + x86_init.resources.probe_roms = x86_init_noop;
> + x86_init.resources.reserve_resources = x86_init_noop;

reserve_bios_regions, probe_roms, and reserve_resources are all being
set to 0 or NULL. Seems like there should be a comment or text in the
commit message saying why. I can work with you offline to write a concise
message.

> + x86_init.mpparse.find_smp_config = x86_init_noop;
> + x86_init.mpparse.get_smp_config = hv_snp_get_smp_config;
> +
> + /*
> + * Hyper-V SEV-SNP enlightened guest doesn't support ioapic
> + * and legacy APIC page read/write. Switch to hv apic here.
> + */
> + disable_ioapic_support();
> + hv_apic_init();
> +
> + processor_count = *(u32 *)__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR);

EN_SEV_SNP_PROCESSOR_INFO_ADDR is not defined until Patch 13 of this series.

> +
> + entry = (struct memory_map_entry *)(__va(EN_SEV_SNP_PROCESSOR_INFO_ADDR)
> + + sizeof(struct memory_map_entry));

This calculation isn't what I expected. A brief summary of the layout of the memory
at this fixed address would be helpful. Evidently, the first 32 bits is the processor count,
and then there are multiple memory map entries. But why skip over an entire memory
map entry to account for the 32-bit processor_count?

Maybe the definition of struct memory_map_entry and EN_SEV_SNP_PROCESSOR_INFO_ADDR
should go in arch/x86/include/asm/mshyperv.h, along with some explanatory comments.

> +
> + for (; entry->numpages != 0; entry++) {
> + e820_entry = &e820_table->entries[e820_table->nr_entries - 1];
> + e820_end = e820_entry->addr + e820_entry->size;
> + ram_end = (entry->starting_gpn + entry->numpages) * PAGE_SIZE;
> +
> + if (e820_end < entry->starting_gpn * PAGE_SIZE)
> + e820_end = entry->starting_gpn * PAGE_SIZE;
> + if (e820_end < ram_end) {

I'm curious about any gaps or overlaps with the existing e820 map entries. What
is expected? Or is this code just trying to be defensive?

> + pr_info("Hyper-V: add [mem %#018Lx-%#018Lx]\n", e820_end, ram_end - 1);

Seems like the above message should include "e820" to make clear this is an update
to the e820 map.

> + e820__range_add(e820_end, ram_end - e820_end, E820_TYPE_RAM);
> + for (page = e820_end; page < ram_end; page += PAGE_SIZE)
> + pvalidate((unsigned long)__va(page), RMP_PG_SIZE_4K, true);
> + }
> + }

Does e820__update_table() need to be called once any range adds are complete?
I don't know, so I'm just asking.

> + }
> +
> hardlockup_detector_disable();
> }
>
> --
> 2.25.1