Re: [PATCH v3 7/8] x86/sev: Add full support for a segmented RMP table
From: Nikunj A. Dadhania
Date: Fri Oct 18 2024 - 02:32:22 EST
On 9/30/2024 8:52 PM, Tom Lendacky wrote:
> A segmented RMP table allows for improved locality of reference between
> the memory protected by the RMP and the RMP entries themselves.
>
> Add support to detect and initialize a segmented RMP table with multiple
> segments as configured by the system BIOS. While the RMPREAD instruction
> will be used to read an RMP entry in a segmented RMP, initialization and
> debugging capabilities will require the mapping of the segments.
>
> The RMP_CFG MSR indicates if segmented RMP support is enabled and, if
> enabled, the amount of memory that an RMP segment covers. When segmented
> RMP support is enabled, the RMP_BASE MSR points to the start of the RMP
> bookkeeping area, which is 16K in size. The RMP Segment Table (RST) is
> located immediately after the bookkeeping area and is 4K in size. The RST
> contains up to 512 8-byte entries that identify the location of the RMP
> segment and amount of memory mapped by the segment (which must be less
> than or equal to the configured segment size). The physical address that
> is covered by a segment is based on the segment size and the index of the
> segment in the RST. The RMP entry for a physical address is based on the
> offset within the segment.
>
> For example, if the segment size is 64GB (0x1000000000 or 1 << 36), then
> physical address 0x9000800000 is RST entry 9 (0x9000800000 >> 36) and
> RST entry 9 covers physical memory 0x9000000000 to 0x9FFFFFFFFF.
>
> The RMP entry index within the RMP segment is the physical address
> AND-ed with the segment mask, 64GB - 1 (0xFFFFFFFFF), and then
> right-shifted 12 bits or PHYS_PFN(0x9000800000 & 0xFFFFFFFFF), which
> is 0x800.
>
> CPUID 0x80000025_EBX[9:0] describes the number of RMP segments that can
> be cached by the hardware. Additionally, if CPUID 0x80000025_EBX[10] is
> set, then the number of actual RMP segments defined cannot exceed the
> number of RMP segments that can be cached and can be used as a maximum
> RST index.
In case EBX[10] is not set, we will need to iterate over all the 512 segment
entries?
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
> ---
> arch/x86/include/asm/cpufeatures.h | 1 +
> arch/x86/include/asm/msr-index.h | 9 +-
> arch/x86/virt/svm/sev.c | 231 ++++++++++++++++++++++++++---
> 3 files changed, 218 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 93620a4c5b15..417cdc636a12 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -448,6 +448,7 @@
> #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
> #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
> #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
> +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
> #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
>
> /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 3ae84c3b8e6d..8b57c4d1098f 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -682,11 +682,14 @@
> #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
> #define MSR_AMD64_SNP_RESV_BIT 18
> #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
> -
> -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
> -
Moved accidentally?
> #define MSR_AMD64_RMP_BASE 0xc0010132
> #define MSR_AMD64_RMP_END 0xc0010133
> +#define MSR_AMD64_RMP_CFG 0xc0010136
> +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
> +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
> +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
> +
> +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
>
> #define MSR_SVSM_CAA 0xc001f000
>
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index ebfb924652f8..2f83772d3daa 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -97,6 +97,10 @@ struct rmp_segment_desc {
> * a specific portion of memory. There can be up to 512 8-byte entries,
> * one pages worth.
> */
> +#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0))
> +#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20))
> +
> +#define RMP_SEGMENT_TABLE_SIZE SZ_4K
> static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
> static unsigned int rst_max_index __ro_after_init = 512;
>
> @@ -107,6 +111,9 @@ static unsigned long rmp_segment_coverage_mask;
> #define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
> #define RMP_ENTRY_INDEX(x) PHYS_PFN((x) & rmp_segment_coverage_mask)
>
> +static u64 rmp_cfg;
> +#define RMP_IS_SEGMENTED(x) ((x) & MSR_AMD64_SEG_RMP_ENABLED)
> +
> /* Mask to apply to a PFN to get the first PFN of a 2MB page */
> #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>
> @@ -196,7 +203,42 @@ static void __init __snp_fixup_e820_tables(u64 pa)
<skipped the e820 bits>
> @@ -302,24 +344,12 @@ static bool __init alloc_rmp_segment_table(void)
> return true;
> }
>
> -/*
> - * Do the necessary preparations which are verified by the firmware as
> - * described in the SNP_INIT_EX firmware command description in the SNP
> - * firmware ABI spec.
> - */
> -static int __init snp_rmptable_init(void)
> +static bool __init contiguous_rmptable_setup(void)
> {
> - u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
> - unsigned int i;
> -
> - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> - return 0;
> -
> - if (!amd_iommu_snp_en)
> - goto nosnp;
> + u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end;
>
> if (!probed_rmp_size)
> - goto nosnp;
> + return false;
>
> rmp_end = probed_rmp_base + probed_rmp_size - 1;
>
If you dont mind, please fold the below comment update in contiguous_rmptable_setup()
found during review. If required, I can send a separate patch.
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 2f83772d3daa..d5a9f8164672 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -354,7 +354,7 @@ static bool __init contiguous_rmptable_setup(void)
rmp_end = probed_rmp_base + probed_rmp_size - 1;
/*
- * Calculate the amount the memory that must be reserved by the BIOS to
+ * Calculate the amount of memory that must be reserved by the BIOS to
* address the whole RAM, including the bookkeeping area. The RMP itself
* must also be covered.
*/
> @@ -336,11 +366,11 @@ static int __init snp_rmptable_init(void)
> if (calc_rmp_sz > probed_rmp_size) {
> pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
> calc_rmp_sz, probed_rmp_size);
> - goto nosnp;
> + return false;
> }
>
> if (!alloc_rmp_segment_table())
> - goto nosnp;
> + return false;
>
> /* Map only the RMP entries */
> rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
> @@ -348,9 +378,116 @@ static int __init snp_rmptable_init(void)
>
> if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
> free_rmp_segment_table();
> - goto nosnp;
> + return false;
> }
>
> + return true;
> +}
> +
> +static bool __init segmented_rmptable_setup(void)
> +{
> + u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max;
> + unsigned int i, max_index;
> +
> + if (!probed_rmp_base)
> + return false;
> +
> + if (!alloc_rmp_segment_table())
> + return false;
> +
> + /* Map the RMP Segment Table */
> + rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
> + rst = memremap(rst_pa, RMP_SEGMENT_TABLE_SIZE, MEMREMAP_WB);
> + if (!rst) {
> + pr_err("Failed to map RMP segment table addr %#llx\n", rst_pa);
> + goto e_free;
> + }
> +
> + /* Get the address for the end of system RAM */
> + ram_pa_max = max_pfn << PAGE_SHIFT;
> +
> + /* Process each RMP segment */
> + max_index = 0;
> + ram_pa_end = 0;
> + for (i = 0; i < rst_max_index; i++) {
> + u64 rmp_segment, rmp_size, mapped_size;
> +
> + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
> + if (!mapped_size)
> + continue;
> +
> + max_index = i;
> +
> + /* Mapped size in GB */
> + mapped_size *= (1ULL << 30);
> + if (mapped_size > rmp_segment_coverage_size)
> + mapped_size = rmp_segment_coverage_size;
This seems to be an error in BIOS RST programming, probably a print/warning
would help during debug.
> +
> + rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]);
> +
> + rmp_size = PHYS_PFN(mapped_size);
> + rmp_size <<= 4;
A comment above this will help, as you are calculating 16 bytes/page.
> + pa = (u64)i << rmp_segment_coverage_shift;
> +
> + /* Some segments may be for MMIO mapped above system RAM */
Why will RST have MMIO mapped entries ?
> + if (pa < ram_pa_max)
> + ram_pa_end = pa + mapped_size;
> +
> + if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa))
> + goto e_unmap;
> +
> + pr_info("RMP segment %u physical address [%#llx - %#llx] covering [%#llx - %#llx]\n",
> + i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1);
> + }
> +
> + if (ram_pa_max > ram_pa_end) {
> + pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
> + ram_pa_max, ram_pa_end);
> + goto e_unmap;
> + }
> +
> + /* Adjust the maximum index based on the found segments */
> + rst_max_index = max_index + 1;
> +
> + memunmap(rst);
> +
> + return true;
> +
> +e_unmap:
> + memunmap(rst);
> +
> +e_free:
> + free_rmp_segment_table();
> +
> + return false;
> +}
> +
> +static bool __init rmptable_setup(void)
> +{
> + return RMP_IS_SEGMENTED(rmp_cfg) ? segmented_rmptable_setup()
> + : contiguous_rmptable_setup();
> +}
> +
> +/*
> + * Do the necessary preparations which are verified by the firmware as
> + * described in the SNP_INIT_EX firmware command description in the SNP
> + * firmware ABI spec.
> + */
> +static int __init snp_rmptable_init(void)
> +{
> + unsigned int i;
> + u64 val;
> +
> + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> + return 0;
> +
> + if (!amd_iommu_snp_en)
> + goto nosnp;
> +
> + if (!rmptable_setup())
> + goto nosnp;
> +
> /*
> * Check if SEV-SNP is already enabled, this can happen in case of
> * kexec boot.
> @@ -418,7 +555,7 @@ static void set_rmp_segment_info(unsigned int segment_shift)
>
> #define RMP_ADDR_MASK GENMASK_ULL(51, 13)
>
> -bool snp_probe_rmptable_info(void)
> +static bool probe_contiguous_rmptable_info(void)
> {
> u64 rmp_sz, rmp_base, rmp_end;
>
> @@ -451,6 +588,60 @@ bool snp_probe_rmptable_info(void)
> return true;
> }
>
> +static bool probe_segmented_rmptable_info(void)
> +{
> + unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
> + u64 rmp_base, rmp_end;
> +
> + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
> + rdmsrl(MSR_AMD64_RMP_END, rmp_end);
> +
> + if (!(rmp_base & RMP_ADDR_MASK)) {
> + pr_err("Memory for the RMP table has not been reserved by BIOS\n");
> + return false;
> + }
> +
> + WARN_ONCE(rmp_end & RMP_ADDR_MASK,
> + "Segmented RMP enabled but RMP_END MSR is non-zero\n");
> +
> + /* Obtain the min and max supported RMP segment size */
> + eax = cpuid_eax(0x80000025);
> + segment_shift_min = eax & GENMASK(5, 0);
> + segment_shift_max = (eax & GENMASK(11, 6)) >> 6;
> +
> + /* Verify the segment size is within the supported limits */
> + segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg);
> + if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) {
> + pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n",
> + segment_shift, segment_shift_min, segment_shift_max);
> + return false;
> + }
> +
> + /* Override the max supported RST index if a hardware limit exists */
> + ebx = cpuid_ebx(0x80000025);
> + if (ebx & BIT(10))
> + rst_max_index = ebx & GENMASK(9, 0);
> +
> + set_rmp_segment_info(segment_shift);
> +
> + probed_rmp_base = rmp_base;
> + probed_rmp_size = 0;
> +
> + pr_info("RMP segment table physical address [0x%016llx - 0x%016llx]\n",
> + rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RMP_SEGMENT_TABLE_SIZE);
> +
> + return true;
> +}
> +
> +bool snp_probe_rmptable_info(void)
> +{
> + if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
> + rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg);
> +
> + return RMP_IS_SEGMENTED(rmp_cfg) ? probe_segmented_rmptable_info()
> + : probe_contiguous_rmptable_info();
> +}
> +
> static struct rmpentry_raw *__get_rmpentry(unsigned long pfn)
> {
> struct rmp_segment_desc *desc;