Re: [PATCH v6 7/8] x86/sev: Add full support for a segmented RMP table

From: Borislav Petkov
Date: Thu Dec 12 2024 - 04:12:51 EST


On Mon, Dec 02, 2024 at 02:50:52PM -0600, Tom Lendacky wrote:
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 3272a73d3594..31d653db830a 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -100,6 +100,10 @@ struct rmp_segment_desc {
> * a specific portion of memory. There can be up to 512 8-byte entries,
> * one pages worth.
> */
> +#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0))
> +#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20))
> +
> +#define RMP_SEGMENT_TABLE_SIZE SZ_4K

RST_SIZE as we abbreviate the segment table as RST so far...

> static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
> static unsigned int rst_max_index __ro_after_init = 512;
>
> @@ -109,6 +113,9 @@ static u64 rmp_segment_mask;
> #define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_shift)
> #define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_mask)))
>
> +static u64 rmp_cfg;
> +#define RMP_IS_SEGMENTED(x) ((x) & MSR_AMD64_SEG_RMP_ENABLED)

Please drop those macros.

if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED)

is good enough.

> /* Mask to apply to a PFN to get the first PFN of a 2MB page */
> #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>
> @@ -200,7 +207,49 @@ static void __init __snp_fixup_e820_tables(u64 pa)
> void __init snp_fixup_e820_tables(void)
> {
> __snp_fixup_e820_tables(probed_rmp_base);
> - __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
> +
> + if (RMP_IS_SEGMENTED(rmp_cfg)) {

That whole branch in a helper, pls, with a descriptive name what it does.

> + u64 pa, *rst, size, mapped_size;
> + unsigned int i;
> +
> + pa = probed_rmp_base;
> + pa += RMPTABLE_CPU_BOOKKEEPING_SZ;
> + pa += RMP_SEGMENT_TABLE_SIZE;

pa = A + B + C

> + __snp_fixup_e820_tables(pa);
> +
> + pa -= RMP_SEGMENT_TABLE_SIZE;

uff:
pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;

__snp_fixup_e820_tables(pa + RMP_SEGMENT_TABLE_SIZE);

rst = early_memremap(pa, RMP_SEGMENT_TABLE_SIZE);

> + rst = early_memremap(pa, RMP_SEGMENT_TABLE_SIZE);
> + if (!rst)
> + return;
> +
> + for (i = 0; i < rst_max_index; i++) {
> + pa = RST_ENTRY_SEGMENT_BASE(rst[i]);
> + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
> + if (!mapped_size)
> + continue;
> +
> + __snp_fixup_e820_tables(pa);
> +
> + /*
> + * Mapped size in GB. Mapped size is allowed to exceed
> + * the segment coverage size, but gets reduced to the
> + * segment coverage size.
> + */
> + mapped_size <<= 30;
> + if (mapped_size > rmp_segment_size)
> + mapped_size = rmp_segment_size;
> +
> + /* Calculate the RMP segment size (16 bytes/page mapped) */
> + size = PHYS_PFN(mapped_size);
> + size <<= 4;
> +
> + __snp_fixup_e820_tables(pa + size);
> + }
> +
> + early_memunmap(rst, RMP_SEGMENT_TABLE_SIZE);
> + } else {
> + __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
> + }
> }
>
> static bool __init clear_rmptable_bookkeeping(void)
> @@ -308,29 +357,17 @@ static bool __init alloc_rmp_segment_table(void)
> return true;
> }
>
> -/*
> - * Do the necessary preparations which are verified by the firmware as
> - * described in the SNP_INIT_EX firmware command description in the SNP
> - * firmware ABI spec.
> - */
> -static int __init snp_rmptable_init(void)
> +static bool __init contiguous_rmptable_setup(void)

setup_contiguous_rmptable() and setup_segmented_rmptable() - all natural. :)

> {
> - u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
> - unsigned int i;
> -
> - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> - return 0;
> -
> - if (!amd_iommu_snp_en)
> - goto nosnp;
> + u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end;
>
> if (!probed_rmp_size)
> - goto nosnp;
> + return false;

Lift that check into rmptable_setup(). Or should it be called setup_rmptable()
too? :)

>
> rmp_end = probed_rmp_base + probed_rmp_size - 1;
>
> /*
> - * Calculate the amount the memory that must be reserved by the BIOS to
> + * Calculate the amount of memory that must be reserved by the BIOS to
> * address the whole RAM, including the bookkeeping area. The RMP itself
> * must also be covered.
> */
> @@ -342,11 +379,11 @@ static int __init snp_rmptable_init(void)
> if (calc_rmp_sz > probed_rmp_size) {
> pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
> calc_rmp_sz, probed_rmp_size);
> - goto nosnp;
> + return false;
> }
>
> if (!alloc_rmp_segment_table())
> - goto nosnp;
> + return false;
>
> /* Map only the RMP entries */
> rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
> @@ -354,9 +391,127 @@ static int __init snp_rmptable_init(void)
>
> if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
> free_rmp_segment_table();
> - goto nosnp;
> + return false;
> }
>
> + return true;
> +}
> +
> +static bool __init segmented_rmptable_setup(void)
> +{
> + u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max;
> + unsigned int i, max_index;
> +
> + if (!probed_rmp_base)
> + return false;
> +
> + if (!alloc_rmp_segment_table())
> + return false;
> +
> + /* Map the RMP Segment Table */

Kinda obvious, right?

I mean, a variable called "rst_pa" should already explain what it is...

> + rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
> + rst = memremap(rst_pa, RMP_SEGMENT_TABLE_SIZE, MEMREMAP_WB);
> + if (!rst) {
> + pr_err("Failed to map RMP segment table addr %#llx\n", rst_pa);
> + goto e_free;
> + }
> +
> + /* Get the address for the end of system RAM */
> + ram_pa_max = max_pfn << PAGE_SHIFT;

Ditto.

> +
> + /* Process each RMP segment */

Ditto.

> + max_index = 0;
> + ram_pa_end = 0;
> + for (i = 0; i < rst_max_index; i++) {
> + u64 rmp_segment, rmp_size, mapped_size;
> +
> + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
> + if (!mapped_size)
> + continue;
> +
> + max_index = i;
> +
> + /*
> + * Mapped size in GB. Mapped size is allowed to exceed the
> + * segment coverage size, but gets reduced to the segment
> + * coverage size.
> + */
> + mapped_size <<= 30;
> + if (mapped_size > rmp_segment_size) {
> + pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n",
> + i, mapped_size, rmp_segment_size);
> + mapped_size = rmp_segment_size;
> + }
> +
> + rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]);
> +
> + /* Calculate the RMP segment size (16 bytes/page mapped) */
> + rmp_size = PHYS_PFN(mapped_size);
> + rmp_size <<= 4;

rmp_size = PHYS_PFN(mapped_size) << 4;

> +
> + pa = (u64)i << rmp_segment_shift;
> +
> + /*
> + * Some segments may be for MMIO mapped above system RAM. These
> + * segments are used for Trusted I/O.
> + */
> + if (pa < ram_pa_max)
> + ram_pa_end = pa + mapped_size;
> +
> + if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa))
> + goto e_unmap;
> +
> + pr_info("RMP segment %u physical address [%#llx - %#llx] covering [%#llx - %#llx]\n",
> + i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1);
> + }
> +
> + if (ram_pa_max > ram_pa_end) {
> + pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
> + ram_pa_max, ram_pa_end);
> + goto e_unmap;
> + }
> +
> + /* Adjust the maximum index based on the found segments */
> + rst_max_index = max_index + 1;
> +
> + memunmap(rst);
> +
> + return true;
> +
> +e_unmap:
> + memunmap(rst);
> +
> +e_free:
> + free_rmp_segment_table();
> +
> + return false;
> +}
> +
> +static bool __init rmptable_setup(void)
> +{
> + return RMP_IS_SEGMENTED(rmp_cfg) ? segmented_rmptable_setup()
> + : contiguous_rmptable_setup();

How is that more readable than the most simple:

if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED)
return setup_segmented_rmptable();
else
return setup_contiguous_rmptable();

?

> +}
> +
> +/*
> + * Do the necessary preparations which are verified by the firmware as
> + * described in the SNP_INIT_EX firmware command description in the SNP
> + * firmware ABI spec.
> + */
> +static int __init snp_rmptable_init(void)
> +{
> + unsigned int i;
> + u64 val;
> +
> + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> + return 0;
> +
> + if (!amd_iommu_snp_en)
> + goto nosnp;
> +
> + if (!rmptable_setup())
> + goto nosnp;
> +
> /*
> * Check if SEV-SNP is already enabled, this can happen in case of
> * kexec boot.
> @@ -420,7 +575,7 @@ static void set_rmp_segment_info(unsigned int segment_shift)
>
> #define RMP_ADDR_MASK GENMASK_ULL(51, 13)
>
> -bool snp_probe_rmptable_info(void)
> +static bool probe_contiguous_rmptable_info(void)
> {
> u64 rmp_sz, rmp_base, rmp_end;
>
> @@ -453,6 +608,60 @@ bool snp_probe_rmptable_info(void)
> return true;
> }
>
> +static bool probe_segmented_rmptable_info(void)
> +{
> + unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
> + u64 rmp_base, rmp_end;
> +
> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
> + rdmsrl(MSR_AMD64_RMP_END, rmp_end);
> +
> + if (!(rmp_base & RMP_ADDR_MASK)) {
> + pr_err("Memory for the RMP table has not been reserved by BIOS\n");
> + return false;
> + }

Do them in the right order:

rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
if (!(rmp_base & RMP_ADDR_MASK)) {
pr_err("Memory for the RMP table has not been reserved by BIOS\n");
return false;
}

rdmsrl(MSR_AMD64_RMP_END, rmp_end);


> + WARN_ONCE(rmp_end & RMP_ADDR_MASK,
> + "Segmented RMP enabled but RMP_END MSR is non-zero\n");

Meaning?

Fatal error? Doesn't look like it. Sanity-check for BIOS testers?

> +
> + /* Obtain the min and max supported RMP segment size */
> + eax = cpuid_eax(0x80000025);
> + segment_shift_min = eax & GENMASK(5, 0);
> + segment_shift_max = (eax & GENMASK(11, 6)) >> 6;
> +
> + /* Verify the segment size is within the supported limits */
> + segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg);
> + if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) {
> + pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n",
> + segment_shift, segment_shift_min, segment_shift_max);
> + return false;
> + }
> +
> + /* Override the max supported RST index if a hardware limit exists */
> + ebx = cpuid_ebx(0x80000025);
> + if (ebx & BIT(10))
> + rst_max_index = ebx & GENMASK(9, 0);
> +
> + set_rmp_segment_info(segment_shift);
> +
> + probed_rmp_base = rmp_base;
> + probed_rmp_size = 0;
> +
> + pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n",
> + rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RMP_SEGMENT_TABLE_SIZE);
> +
> + return true;
> +}
> +
> +bool snp_probe_rmptable_info(void)
> +{
> + if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
> + rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg);
> +
> + return RMP_IS_SEGMENTED(rmp_cfg) ? probe_segmented_rmptable_info()
> + : probe_contiguous_rmptable_info();

As above.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette