[PATCH v6 7/8] x86/sev: Add full support for a segmented RMP table

From: Tom Lendacky
Date: Mon Dec 02 2024 - 15:52:48 EST


A segmented RMP table allows for improved locality of reference between
the memory protected by the RMP and the RMP entries themselves.

Add support to detect and initialize a segmented RMP table with multiple
segments as configured by the system BIOS. While the RMPREAD instruction
will be used to read an RMP entry in a segmented RMP, initialization and
debugging capabilities will require the mapping of the segments.

The RMP_CFG MSR indicates if segmented RMP support is enabled and, if
enabled, the amount of memory that an RMP segment covers. When segmented
RMP support is enabled, the RMP_BASE MSR points to the start of the RMP
bookkeeping area, which is 16K in size. The RMP Segment Table (RST) is
located immediately after the bookkeeping area and is 4K in size. The RST
contains up to 512 8-byte entries that identify the location of the RMP
segment and amount of memory mapped by the segment (which must be less
than or equal to the configured segment size). The physical address that
is covered by a segment is based on the segment size and the index of the
segment in the RST. The RMP entry for a physical address is based on the
offset within the segment.

For example, if the segment size is 64GB (0x1000000000 or 1 << 36), then
physical address 0x9000800000 is RST entry 9 (0x9000800000 >> 36) and
RST entry 9 covers physical memory 0x9000000000 to 0x9FFFFFFFFF.

The RMP entry index within the RMP segment is the physical address
AND-ed with the segment mask, 64GB - 1 (0xFFFFFFFFF), and then
right-shifted 12 bits or PHYS_PFN(0x9000800000 & 0xFFFFFFFFF), which
is 0x800.

CPUID 0x80000025_EBX[9:0] describes the number of RMP segments that can
be cached by the hardware. Additionally, if CPUID 0x80000025_EBX[10] is
set, then the number of actual RMP segments defined cannot exceed the
number of RMP segments that can be cached and can be used as a maximum
RST index.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
Reviewed-by: Nikunj A Dadhania <nikunj@xxxxxxx>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@xxxxxxx>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/msr-index.h | 8 +-
arch/x86/virt/svm/sev.c | 251 ++++++++++++++++++++++++++---
3 files changed, 236 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5535edc6e8d7..6a6db7cd97cb 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -452,6 +452,7 @@
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */

/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..3f3e2bc99162 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -644,6 +644,7 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
@@ -682,11 +683,12 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_RESV_BIT 18
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
-
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)

#define MSR_SVSM_CAA 0xc001f000

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 3272a73d3594..31d653db830a 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -100,6 +100,10 @@ struct rmp_segment_desc {
* a specific portion of memory. There can be up to 512 8-byte entries,
* one pages worth.
*/
+#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0))
+#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20))
+
+#define RMP_SEGMENT_TABLE_SIZE SZ_4K
static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
static unsigned int rst_max_index __ro_after_init = 512;

@@ -109,6 +113,9 @@ static u64 rmp_segment_mask;
#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_shift)
#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_mask)))

+static u64 rmp_cfg;
+#define RMP_IS_SEGMENTED(x) ((x) & MSR_AMD64_SEG_RMP_ENABLED)
+
/* Mask to apply to a PFN to get the first PFN of a 2MB page */
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)

@@ -200,7 +207,49 @@ static void __init __snp_fixup_e820_tables(u64 pa)
void __init snp_fixup_e820_tables(void)
{
__snp_fixup_e820_tables(probed_rmp_base);
- __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+
+ if (RMP_IS_SEGMENTED(rmp_cfg)) {
+ u64 pa, *rst, size, mapped_size;
+ unsigned int i;
+
+ pa = probed_rmp_base;
+ pa += RMPTABLE_CPU_BOOKKEEPING_SZ;
+ pa += RMP_SEGMENT_TABLE_SIZE;
+ __snp_fixup_e820_tables(pa);
+
+ pa -= RMP_SEGMENT_TABLE_SIZE;
+ rst = early_memremap(pa, RMP_SEGMENT_TABLE_SIZE);
+ if (!rst)
+ return;
+
+ for (i = 0; i < rst_max_index; i++) {
+ pa = RST_ENTRY_SEGMENT_BASE(rst[i]);
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ __snp_fixup_e820_tables(pa);
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed
+ * the segment coverage size, but gets reduced to the
+ * segment coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_size)
+ mapped_size = rmp_segment_size;
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ size = PHYS_PFN(mapped_size);
+ size <<= 4;
+
+ __snp_fixup_e820_tables(pa + size);
+ }
+
+ early_memunmap(rst, RMP_SEGMENT_TABLE_SIZE);
+ } else {
+ __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+ }
}

static bool __init clear_rmptable_bookkeeping(void)
@@ -308,29 +357,17 @@ static bool __init alloc_rmp_segment_table(void)
return true;
}

-/*
- * Do the necessary preparations which are verified by the firmware as
- * described in the SNP_INIT_EX firmware command description in the SNP
- * firmware ABI spec.
- */
-static int __init snp_rmptable_init(void)
+static bool __init contiguous_rmptable_setup(void)
{
- u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
- unsigned int i;
-
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
-
- if (!amd_iommu_snp_en)
- goto nosnp;
+ u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end;

if (!probed_rmp_size)
- goto nosnp;
+ return false;

rmp_end = probed_rmp_base + probed_rmp_size - 1;

/*
- * Calculate the amount the memory that must be reserved by the BIOS to
+ * Calculate the amount of memory that must be reserved by the BIOS to
* address the whole RAM, including the bookkeeping area. The RMP itself
* must also be covered.
*/
@@ -342,11 +379,11 @@ static int __init snp_rmptable_init(void)
if (calc_rmp_sz > probed_rmp_size) {
pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
calc_rmp_sz, probed_rmp_size);
- goto nosnp;
+ return false;
}

if (!alloc_rmp_segment_table())
- goto nosnp;
+ return false;

/* Map only the RMP entries */
rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
@@ -354,9 +391,127 @@ static int __init snp_rmptable_init(void)

if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
free_rmp_segment_table();
- goto nosnp;
+ return false;
}

+ return true;
+}
+
+static bool __init segmented_rmptable_setup(void)
+{
+ u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max;
+ unsigned int i, max_index;
+
+ if (!probed_rmp_base)
+ return false;
+
+ if (!alloc_rmp_segment_table())
+ return false;
+
+ /* Map the RMP Segment Table */
+ rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rst = memremap(rst_pa, RMP_SEGMENT_TABLE_SIZE, MEMREMAP_WB);
+ if (!rst) {
+ pr_err("Failed to map RMP segment table addr %#llx\n", rst_pa);
+ goto e_free;
+ }
+
+ /* Get the address for the end of system RAM */
+ ram_pa_max = max_pfn << PAGE_SHIFT;
+
+ /* Process each RMP segment */
+ max_index = 0;
+ ram_pa_end = 0;
+ for (i = 0; i < rst_max_index; i++) {
+ u64 rmp_segment, rmp_size, mapped_size;
+
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ max_index = i;
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed the
+ * segment coverage size, but gets reduced to the segment
+ * coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_size) {
+ pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n",
+ i, mapped_size, rmp_segment_size);
+ mapped_size = rmp_segment_size;
+ }
+
+ rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]);
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ rmp_size = PHYS_PFN(mapped_size);
+ rmp_size <<= 4;
+
+ pa = (u64)i << rmp_segment_shift;
+
+ /*
+ * Some segments may be for MMIO mapped above system RAM. These
+ * segments are used for Trusted I/O.
+ */
+ if (pa < ram_pa_max)
+ ram_pa_end = pa + mapped_size;
+
+ if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa))
+ goto e_unmap;
+
+ pr_info("RMP segment %u physical address [%#llx - %#llx] covering [%#llx - %#llx]\n",
+ i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1);
+ }
+
+ if (ram_pa_max > ram_pa_end) {
+ pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ ram_pa_max, ram_pa_end);
+ goto e_unmap;
+ }
+
+ /* Adjust the maximum index based on the found segments */
+ rst_max_index = max_index + 1;
+
+ memunmap(rst);
+
+ return true;
+
+e_unmap:
+ memunmap(rst);
+
+e_free:
+ free_rmp_segment_table();
+
+ return false;
+}
+
+static bool __init rmptable_setup(void)
+{
+ return RMP_IS_SEGMENTED(rmp_cfg) ? segmented_rmptable_setup()
+ : contiguous_rmptable_setup();
+}
+
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
+static int __init snp_rmptable_init(void)
+{
+ unsigned int i;
+ u64 val;
+
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return 0;
+
+ if (!amd_iommu_snp_en)
+ goto nosnp;
+
+ if (!rmptable_setup())
+ goto nosnp;
+
/*
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
@@ -420,7 +575,7 @@ static void set_rmp_segment_info(unsigned int segment_shift)

#define RMP_ADDR_MASK GENMASK_ULL(51, 13)

-bool snp_probe_rmptable_info(void)
+static bool probe_contiguous_rmptable_info(void)
{
u64 rmp_sz, rmp_base, rmp_end;

@@ -453,6 +608,60 @@ bool snp_probe_rmptable_info(void)
return true;
}

+static bool probe_segmented_rmptable_info(void)
+{
+ unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
+ u64 rmp_base, rmp_end;
+
+ rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ WARN_ONCE(rmp_end & RMP_ADDR_MASK,
+ "Segmented RMP enabled but RMP_END MSR is non-zero\n");
+
+ /* Obtain the min and max supported RMP segment size */
+ eax = cpuid_eax(0x80000025);
+ segment_shift_min = eax & GENMASK(5, 0);
+ segment_shift_max = (eax & GENMASK(11, 6)) >> 6;
+
+ /* Verify the segment size is within the supported limits */
+ segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg);
+ if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) {
+ pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n",
+ segment_shift, segment_shift_min, segment_shift_max);
+ return false;
+ }
+
+ /* Override the max supported RST index if a hardware limit exists */
+ ebx = cpuid_ebx(0x80000025);
+ if (ebx & BIT(10))
+ rst_max_index = ebx & GENMASK(9, 0);
+
+ set_rmp_segment_info(segment_shift);
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = 0;
+
+ pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n",
+ rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RMP_SEGMENT_TABLE_SIZE);
+
+ return true;
+}
+
+bool snp_probe_rmptable_info(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
+ rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg);
+
+ return RMP_IS_SEGMENTED(rmp_cfg) ? probe_segmented_rmptable_info()
+ : probe_contiguous_rmptable_info();
+}
+
static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
u64 paddr, rst_index, segment_index;
--
2.46.2