Re: [RFC PATCH V2 01/18] x86/sev: Pvalidate memory gab for decompressing kernel

From: Tianyu Lan
Date: Thu Dec 08 2022 - 08:04:27 EST


On 12/6/2022 5:16 PM, Gupta, Pankaj wrote:
Hi Tianyu,

Some minor nits below.

Thanks a lot to review. I will change these in the next version.

Pvalidate needed pages for decompressing kernel. The E820_TYPE_RAM
entry includes only validated memory. The kernel expects that the
RAM entry's addr is fixed while the entry size is to be extended
to cover addresses to the start of next entry. This patch increases
the RAM entry size to cover all possilble memory addresses until

s/possilble/possible

init_size.

Signed-off-by: Tianyu Lan <tiala@xxxxxxxxxxxxx>
---
  arch/x86/boot/compressed/head_64.S |  8 +++
  arch/x86/boot/compressed/sev.c     | 84 ++++++++++++++++++++++++++++++
  2 files changed, 92 insertions(+)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d33f060900d2..818edaf5d0cf 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -348,6 +348,14 @@ SYM_CODE_START(startup_64)
      cld
      cli
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+    /* pvalidate memory on demand if SNP is enabled. */
+    pushq    %rsi
+    movq    %rsi, %rdi
+    call     pvalidate_for_startup_64
+    popq    %rsi
+#endif
+
      /* Setup data segments. */
      xorl    %eax, %eax
      movl    %eax, %ds
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 960968f8bf75..3a5a1ab16095 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -12,8 +12,10 @@
   */
  #include "misc.h"
+#include <asm/msr-index.h>
  #include <asm/pgtable_types.h>
  #include <asm/sev.h>
+#include <asm/svm.h>
  #include <asm/trapnr.h>
  #include <asm/trap_pf.h>
  #include <asm/msr-index.h>
@@ -21,6 +23,7 @@
  #include <asm/ptrace.h>
  #include <asm/svm.h>
  #include <asm/cpuid.h>
+#include <asm/e820/types.h>
  #include "error.h"
  #include "../msr.h"
@@ -117,6 +120,22 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
  /* Include code for early handlers */
  #include "../../kernel/sev-shared.c"
+/* Check SEV-SNP via MSR */
+static bool sev_snp_runtime_check(void)
+{
+    unsigned long low, high;
+    u64 val;
+
+    asm volatile("rdmsr\n" : "=a" (low), "=d" (high) :
+            "c" (MSR_AMD64_SEV));
+
+    val = (high << 32) | low;
+    if (val & MSR_AMD64_SEV_SNP_ENABLED)
+        return true;
+
+    return false;
+}
+
  static inline bool sev_snp_enabled(void)
  {
      return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
@@ -456,3 +475,68 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
      sev_verify_cbit(top_level_pgt);
  }
+
+static void extend_e820_on_demand(struct boot_e820_entry *e820_entry,
+                  u64 needed_ram_end)
+{
+    u64 end, paddr;
+    unsigned long eflags;
+    int rc;
+
+    if (!e820_entry)
+        return;
+
+    /* Validated memory must be aligned by PAGE_SIZE. */
+    end = ALIGN(e820_entry->addr + e820_entry->size, PAGE_SIZE);
+    if (needed_ram_end > end && e820_entry->type == E820_TYPE_RAM) {

maybe "if check" can be used to return from here, would read code better?

+        for (paddr = end; paddr < needed_ram_end; paddr += PAGE_SIZE)
Maybe we could reuse 'pvalidate_pages' here?

+            rc = pvalidate(paddr, RMP_PG_SIZE_4K, true);
+            if (rc) {
+                error("Failed to validate address.n");

Would it make sense to print the cause of failure here?

+                return;
+            }
+        }
+        e820_entry->size = needed_ram_end - e820_entry->addr;
+    }
+}
+
+/*
+ * Explicitly pvalidate needed pages for decompressing the kernel.
+ * The E820_TYPE_RAM entry includes only validated memory. The kernel
+ * expects that the RAM entry's addr is fixed while the entry size is to be
+ * extended to cover addresses to the start of next entry.
+ * The function increases the RAM entry size to cover all possible memory
+ * addresses until init_size.
+ * For example,  init_end = 0x4000000,
+ * [RAM: 0x0 - 0x0],                       M[RAM: 0x0 - 0xa0000]
+ * [RSVD: 0xa0000 - 0x10000]                [RSVD: 0xa0000 - 0x10000]
+ * [ACPI: 0x10000 - 0x20000]      ==>       [ACPI: 0x10000 - 0x20000]
+ * [RSVD: 0x800000 - 0x900000]              [RSVD: 0x800000 - 0x900000]
+ * [RAM: 0x1000000 - 0x2000000]            M[RAM: 0x1000000 - 0x2001000]
+ * [RAM: 0x2001000 - 0x2007000]            M[RAM: 0x2001000 - 0x4000000]
+ * Other RAM memory after init_end is pvalidated by ms_hyperv_init_platform
+ */
+__visible void pvalidate_for_startup_64(struct boot_params *boot_params)
+{
+    struct boot_e820_entry *e820_entry;
+    u64 init_end =
+        boot_params->hdr.pref_address + boot_params->hdr.init_size;
+    u8 i, nr_entries = boot_params->e820_entries;
+    u64 needed_end;

Could not very well interpret the name 'needed_end'.

+
+    if (!sev_snp_runtime_check())
+        return;
+
+    for (i = 0; i < nr_entries; ++i) {
+        /* Pvalidate memory holes in e820 RAM entries. */

Pvalidate and pvalidate names are used interchangeably in comments.

+        e820_entry = &boot_params->e820_table[i];
+        if (i < nr_entries - 1) {
+            needed_end = boot_params->e820_table[i + 1].addr;
+            if (needed_end < e820_entry->addr)
+                error("e820 table is not sorted.\n");
+        } else {
+            needed_end = init_end;
+        }
+        extend_e820_on_demand(e820_entry, needed_end);
+    }
+}