[PATCH v5 04/13] x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0
From: Tom Lendacky
Date: Wed Jun 05 2024 - 11:20:36 EST
The PVALIDATE instruction can only be performed at VMPL0. An SVSM will
be present when running at VMPL1 or a lower privilege level.
When an SVSM is present, use the SVSM_CORE_PVALIDATE call to perform
memory validation instead of issuing the PVALIDATE instruction directly.
The validation of a single 4K page is now explicitly identified as such
in the function name, pvalidate_4k_page(). The pvalidate_pages() function
is used for validating 1 or more pages at either 4K or 2M in size. Each
function, however, determines whether it can issue the PVALIDATE directly
or whether the SVSM needs to be invoked.
Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
arch/x86/boot/compressed/sev.c | 45 +++++-
arch/x86/include/asm/sev.h | 26 ++++
arch/x86/kernel/sev-shared.c | 250 +++++++++++++++++++++++++++++++--
arch/x86/kernel/sev.c | 30 ++--
4 files changed, 325 insertions(+), 26 deletions(-)
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 927b71495122..1cc3106a3ba7 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -129,6 +129,34 @@ static bool fault_in_kernel_space(unsigned long address)
/* Include code for early handlers */
#include "../../kernel/sev-shared.c"
+static struct svsm_ca *svsm_get_caa(void)
+{
+ return boot_svsm_caa;
+}
+
+static u64 svsm_get_caa_pa(void)
+{
+ return boot_svsm_caa_pa;
+}
+
+static int svsm_perform_call_protocol(struct svsm_call *call)
+{
+ struct ghcb *ghcb;
+ int ret;
+
+ if (boot_ghcb)
+ ghcb = boot_ghcb;
+ else
+ ghcb = NULL;
+
+ do {
+ ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
+ : svsm_perform_msr_protocol(call);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
@@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* If private -> shared then invalidate the page before requesting the
* state change in the RMP table.
*/
- if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(paddr, paddr, false);
/* Issue VMGEXIT to change the page state in RMP table. */
sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
@@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
* Now that page state is changed in the RMP table, validate it so that it is
* consistent with the RMP entry.
*/
- if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(paddr, paddr, true);
}
void snp_set_page_private(unsigned long paddr)
@@ -255,6 +283,15 @@ void sev_es_shutdown_ghcb(void)
if (!sev_es_check_cpu_features())
error("SEV-ES CPU Features missing.");
+ /*
+ * This is used to determine whether to use the GHCB MSR protocol or
+ * the GHCB shared page to perform a GHCB request. Since the GHCB page
+ * is being changed to encrypted, it can't be used to perform GHCB
+ * requests. Clear the boot_ghcb variable so that the GHCB MSR protocol
+ * is used to change the GHCB page over to an encrypted page.
+ */
+ boot_ghcb = NULL;
+
/*
* GHCB Page must be flushed from the cache and mapped encrypted again.
* Otherwise the running kernel will see strange cache effects when
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 3abc2d759db7..01e3866c4d61 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -187,6 +187,31 @@ struct svsm_ca {
#define SVSM_ERR_INVALID_PARAMETER 0x80000005
#define SVSM_ERR_INVALID_REQUEST 0x80000006
#define SVSM_ERR_BUSY 0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+ u64 page_size : 2,
+ action : 1,
+ ignore_cf : 1,
+ rsvd : 8,
+ pfn : 52;
+};
+
+struct svsm_pvalidate_call {
+ u16 num_entries;
+ u16 cur_index;
+
+ u8 rsvd1[4];
+
+ struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \
+ offsetof(struct svsm_pvalidate_call, entry)) / \
+ sizeof(struct svsm_pvalidate_entry))
/*
* SVSM protocol structure
@@ -207,6 +232,7 @@ struct svsm_call {
#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
#define SVSM_CORE_REMAP_CA 0
+#define SVSM_CORE_PVALIDATE 1
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern void __sev_es_ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 00173deefc46..c274fa826ef0 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -40,6 +40,10 @@ static u8 snp_vmpl __ro_after_init;
static struct svsm_ca *boot_svsm_caa __ro_after_init;
static u64 boot_svsm_caa_pa __ro_after_init;
+static struct svsm_ca *svsm_get_caa(void);
+static u64 svsm_get_caa_pa(void);
+static int svsm_perform_call_protocol(struct svsm_call *call);
+
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn;
@@ -1216,19 +1220,97 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
}
}
-static void pvalidate_pages(struct snp_psc_desc *desc)
+static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
+{
+ unsigned int page_size;
+ bool action;
+ u64 pfn;
+
+ pfn = pc->entry[pc->cur_index].pfn;
+ action = pc->entry[pc->cur_index].action;
+ page_size = pc->entry[pc->cur_index].page_size;
+
+ WARN(1, "PVALIDATE failure: pfn 0x%llx, action=%u, size=%u - ret=%d, svsm_ret=0x%llx\n",
+ pfn, action, page_size, ret, svsm_ret);
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+static void pval_terminate(u64 pfn, bool action, unsigned int page_size, int ret)
+{
+ WARN(1, "PVALIDATE failure: pfn 0x%llx, action=%u, size=%u - ret=%d\n",
+ pfn, action, page_size, ret);
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+}
+
+static void svsm_pval_4k_page(unsigned long paddr, bool validate)
+{
+ struct svsm_pvalidate_call *pc;
+ struct svsm_call call = {};
+ unsigned long flags;
+ u64 pc_pa;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ call.caa = svsm_get_caa();
+
+ pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
+ pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+
+ pc->num_entries = 1;
+ pc->cur_index = 0;
+ pc->entry[0].page_size = RMP_PG_SIZE_4K;
+ pc->entry[0].action = validate;
+ pc->entry[0].ignore_cf = 0;
+ pc->entry[0].pfn = paddr >> PAGE_SHIFT;
+
+ /* Protocol 0, Call ID 1 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
+ call.rcx = pc_pa;
+
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+
+ native_local_irq_restore(flags);
+}
+
+static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool validate)
+{
+ int ret;
+
+ /*
+ * This can be called very early in the boot, so use rip-relative
+ * references as needed.
+ */
+ if (RIP_REL_REF(snp_vmpl)) {
+ svsm_pval_4k_page(paddr, validate);
+ } else {
+ ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+ if (ret)
+ pval_terminate(PHYS_PFN(paddr), validate, RMP_PG_SIZE_4K, ret);
+ }
+}
+
+static void pval_pages(struct snp_psc_desc *desc)
{
struct psc_entry *e;
unsigned long vaddr;
unsigned int size;
unsigned int i;
bool validate;
+ u64 pfn;
int rc;
for (i = 0; i <= desc->hdr.end_entry; i++) {
e = &desc->entries[i];
- vaddr = (unsigned long)pfn_to_kaddr(e->gfn);
+ pfn = e->gfn;
+ vaddr = (unsigned long)pfn_to_kaddr(pfn);
size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
validate = e->operation == SNP_PAGE_STATE_PRIVATE;
@@ -1236,20 +1318,170 @@ static void pvalidate_pages(struct snp_psc_desc *desc)
if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
unsigned long vaddr_end = vaddr + PMD_SIZE;
- for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
+ for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
if (rc)
- break;
+ pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc);
}
- }
-
- if (rc) {
- WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc);
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ } else if (rc) {
+ pval_terminate(pfn, validate, size, rc);
}
}
}
+static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
+ struct svsm_pvalidate_call *pc)
+{
+ struct svsm_pvalidate_entry *pe;
+
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
+
+ pe = &pc->entry[0];
+
+ while (pfn < pfn_end) {
+ pe->page_size = RMP_PG_SIZE_4K;
+ pe->action = action;
+ pe->ignore_cf = 0;
+ pe->pfn = pfn;
+
+ pe++;
+ pfn++;
+
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
+ }
+
+ return pfn;
+}
+
+static void svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc,
+ struct svsm_pvalidate_call *pc)
+{
+ struct svsm_pvalidate_entry *pe;
+ unsigned int desc_entry;
+ struct psc_entry *e;
+
+ desc_entry = desc->hdr.cur_entry;
+
+ /* Nothing in the CA yet */
+ pc->num_entries = 0;
+ pc->cur_index = 0;
+
+ pe = &pc->entry[0];
+ e = &desc->entries[desc_entry];
+
+ while (desc_entry <= desc->hdr.end_entry) {
+ pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
+ pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
+ pe->ignore_cf = 0;
+ pe->pfn = e->gfn;
+
+ pe++;
+ e++;
+
+ desc_entry++;
+ pc->num_entries++;
+ if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
+ break;
+ }
+
+ desc->hdr.cur_entry = desc_entry;
+}
+
+static void svsm_pval_pages(struct snp_psc_desc *desc)
+{
+ struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
+ unsigned int i, pv_4k_count = 0;
+ struct svsm_pvalidate_call *pc;
+ struct svsm_call call = {};
+ unsigned long flags;
+ bool action;
+ u64 pc_pa;
+ int ret;
+
+ /*
+ * This can be called very early in the boot, use native functions in
+ * order to avoid paravirt issues.
+ */
+ flags = native_local_irq_save();
+
+ /*
+ * The SVSM calling area (CA) can support processing 510 entries at a
+ * time. Loop through the Page State Change descriptor until the CA is
+ * full or the last entry in the descriptor is reached, at which time
+ * the SVSM is invoked. This repeats until all entries in the descriptor
+ * are processed.
+ */
+ call.caa = svsm_get_caa();
+
+ pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
+ pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
+
+ /* Protocol 0, Call ID 1 */
+ call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
+ call.rcx = pc_pa;
+
+ while (desc->hdr.cur_entry <= desc->hdr.end_entry) {
+ svsm_build_ca_from_psc_desc(desc, pc);
+
+ do {
+ ret = svsm_perform_call_protocol(&call);
+ if (ret) {
+ /*
+ * Check if the entry failed because of an RMP mismatch (a
+ * PVALIDATE at 2M was requested, but the page is mapped in
+ * the RMP as 4K).
+ */
+ if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
+ pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
+ /* Save this entry for post-processing at 4K */
+ pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
+
+ /* Skip to the next one unless at the end of the list */
+ pc->cur_index++;
+ if (pc->cur_index < pc->num_entries)
+ ret = -EAGAIN;
+ else
+ ret = 0;
+ }
+ }
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+ }
+
+ /* Process any entries that failed to be validated at 2M and validate them at 4K */
+ for (i = 0; i < pv_4k_count; i++) {
+ u64 pfn, pfn_end;
+
+ action = pv_4k[i].action;
+ pfn = pv_4k[i].pfn;
+ pfn_end = pfn + 512;
+
+ while (pfn < pfn_end) {
+ pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
+
+ ret = svsm_perform_call_protocol(&call);
+ if (ret)
+ svsm_pval_terminate(pc, ret, call.rax_out);
+ }
+ }
+
+ native_local_irq_restore(flags);
+}
+
+static void pvalidate_pages(struct snp_psc_desc *desc)
+{
+ if (snp_vmpl)
+ svsm_pval_pages(desc);
+ else
+ pval_pages(desc);
+}
+
static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
{
int cur_entry, end_entry, ret = 0;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 6bab3244a3b9..b5c18ed4c572 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -626,6 +626,17 @@ static struct svsm_ca *svsm_get_caa(void)
: RIP_REL_REF(boot_svsm_caa);
}
+static u64 svsm_get_caa_pa(void)
+{
+ /*
+ * Use rip-relative references when called early in the boot. If
+ * cas_initialized is set, then it is late in the boot and no need
+ * to worry about rip-relative references.
+ */
+ return RIP_REL_REF(sev_cfg).cas_initialized ? this_cpu_read(svsm_caa_pa)
+ : RIP_REL_REF(boot_svsm_caa_pa);
+}
+
static noinstr void __sev_put_ghcb(struct ghcb_state *state)
{
struct sev_es_runtime_data *data;
@@ -798,7 +809,6 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
{
unsigned long paddr_end;
u64 val;
- int ret;
vaddr = vaddr & PAGE_MASK;
@@ -806,12 +816,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr_end = paddr + (npages << PAGE_SHIFT);
while (paddr < paddr_end) {
- if (op == SNP_PAGE_STATE_SHARED) {
- /* Page validation must be rescinded before changing to shared */
- ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false);
- if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
- goto e_term;
- }
+ /* Page validation must be rescinded before changing to shared */
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_4k_page(vaddr, paddr, false);
/*
* Use the MSR protocol because this function can be called before
@@ -833,12 +840,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,
paddr, GHCB_MSR_PSC_RESP_VAL(val)))
goto e_term;
- if (op == SNP_PAGE_STATE_PRIVATE) {
- /* Page validation must be performed after changing to private */
- ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true);
- if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
- goto e_term;
- }
+ /* Page validation must be performed after changing to private */
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_4k_page(vaddr, paddr, true);
vaddr += PAGE_SIZE;
paddr += PAGE_SIZE;
--
2.43.2