[PATCH v3 2/2] x86/sev: Add callback to apply RMP table fixups for kexec

From: Ashish Kalra
Date: Thu Apr 25 2024 - 20:43:58 EST


From: Ashish Kalra <ashish.kalra@xxxxxxx>

Handle cases where the RMP table placement in the BIOS is
not 2M aligned and then the kexec kernel could try to allocate
from within that chunk and that causes a fatal RMP fault.

The kexec failure is illustrated below from the kernel logs:

[ 0.000000] SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff]
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable
[ 0.000000] BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x0000000000090000-0x000000000009ffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000005a14afff] usable
[ 0.000000] BIOS-e820: [mem 0x000000005a14b000-0x000000005a34afff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000005a34b000-0x0000000067acefff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000067acf000-0x000000006dfcefff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000006dfcf000-0x000000006edfefff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006edff000-0x000000006effefff] ACPI data
[ 0.000000] BIOS-e820: [mem 0x000000006efff000-0x000000006effffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000006f000000-0x000000006f00afff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006f00b000-0x000000006fffffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000070000000-0x000000008fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000aa000000-0x00000000aaffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000c5000000-0x00000000c5ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000e0000000-0x00000000e0ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fd000000-0x00000000ffffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000407fcfffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000407fd00000-0x000000407fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable

As seen here in the e820 memory map, the end range of RMP table is not aligned to
2MB and not reserved and usable as RAM.

Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory code and
boot_param, command line and other setup data into this RAM region as seen in the
kexec logs below, which leads to fatal RMP fault during kexec boot.

[ 173.113085] Loaded purgatory at 0x807f1fa000
[ 173.113099] Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000
[ 173.113107] Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000
[ 173.113291] Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014
[ 173.113296] E820 memmap:
[ 173.113298] 0000000000000000-000000000008efff (1)
[ 173.113300] 000000000008f000-000000000008ffff (4)
[ 173.113302] 0000000000090000-000000000009ffff (1)
[ 173.113303] 0000000000100000-000000005a14afff (1)
[ 173.113305] 000000005a14b000-000000005a34afff (2)
[ 173.113306] 000000005a34b000-0000000067acefff (1)
[ 173.113308] 0000000067acf000-000000006dfcefff (2)
[ 173.113309] 000000006dfcf000-000000006edfefff (4)
[ 173.113311] 000000006edff000-000000006effefff (3)
[ 173.113312] 000000006efff000-000000006effffff (1)
[ 173.113314] 000000006f000000-000000006f00afff (4)
[ 173.113315] 000000006f00b000-000000006fffffff (1)
[ 173.113317] 0000000070000000-000000008fffffff (2)
[ 173.113318] 00000000aa000000-00000000aaffffff (2)
[ 173.113319] 00000000c5000000-00000000c5ffffff (2)
[ 173.113321] 00000000e0000000-00000000e0ffffff (2)
[ 173.113322] 00000000fd000000-00000000ffffffff (2)
[ 173.113324] 0000000100000000-000000407fcfffff (1)
[ 173.113325] 000000407fd00000-000000407fffffff (2)
[ 173.113327] 0000004080000000-0000007ffe7fffff (1)
[ 173.113328] 0000007ffe800000-000000807f0fffff (2)
[ 173.113330] 000000807f100000-000000807f1fefff (1)
[ 173.113331] 000000807f1ff000-000000807fffffff (2)
[ 173.690528] nr_segments = 4
[ 173.690533] segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000
[ 173.690546] segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000
[ 173.690552] segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000
[ 173.697994] segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000
[ 173.708672] kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0

Check if RMP table start & end physical range in e820 tables
are not aligned to 2MB and in that case map this range to reserved in all
the three e820 tables.

Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx>
---
arch/x86/include/asm/sev.h | 2 ++
arch/x86/mm/mem_encrypt.c | 13 ++++++++++++
arch/x86/virt/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+)

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7f57382afee4..24300927a476 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
+void snp_rmptable_e820_fixup(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
+static inline void snp_rmptable_e820_fixup(void) {}
#endif

#endif
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6f3b3e028718..d88c942dd311 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -102,6 +102,19 @@ void __init mem_encrypt_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;

+ /*
+ * Invoke callback to do RMP table fixups which needs to be called
+ * during setup_arch() after the e820 tables have been setup
+ * in e820__memory_setup() and this function is appropriate to
+ * invoke the callback to apply any memory encryption platform specific
+ * quirks. The callback to do RMP table fixups cannot be invoked from
+ * snp_init() as snp_init() is called from sme_enable() in
+ * startup_64() which is before setup_arch() and e820 tables
+ * have still not been setup.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ snp_rmptable_e820_fixup();
+
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index ab0e8448bb6e..1b4b99b26bec 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -163,6 +163,48 @@ bool snp_probe_rmptable_info(void)
return true;
}

+static void __init __snp_e820_tables_fixup(u64 pa)
+{
+ if (IS_ALIGNED(pa, PMD_SIZE))
+ return;
+
+ /*
+ * Check if RMP table start and end physical range
+ * in e820_tables are not aligned to 2MB and in that case map
+ * this range in all the three e820 tables to be reserved.
+ * The e820_table needs to be updated as it is converted to
+ * kernel memory resources and used by KEXEC_FILE_LOAD syscall
+ * to load kexec segments. The e820_table_firmware needs to be
+ * updated as it is exposed to sysfs and used by KEXEC_LOAD
+ * syscall to load kexec segments and e820_table_kexec needs
+ * to be updated as it passed to kexec-ed kernel.
+ */
+ pa = ALIGN_DOWN(pa, PMD_SIZE);
+ if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
+ pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
+ e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ }
+}
+
+/*
+ * Callback to do any RMP table fixups, needs to be called
+ * after e820__memory_setup(), after the e820 tables are
+ * setup/populated and before e820__reserve_resources(), before
+ * the e820 map has been converted to the standard Linux memory
+ * resources and e820 map is no longer used and modifying it
+ * has no effect. Handle cases where the RMP table placement in
+ * the BIOS is not 2M aligned and then the kexec kernel could
+ * try to allocate from within that chunk and that causes a
+ * fatal RMP fault.
+ */
+void __init snp_rmptable_e820_fixup(void)
+{
+ __snp_e820_tables_fixup(probed_rmp_base);
+ __snp_e820_tables_fixup(probed_rmp_base + probed_rmp_size);
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
--
2.34.1