[Part1 PATCH v6 14/17] x86: Add support for changing memory encryption attribute in early boot

From: Brijesh Singh
Date: Mon Oct 16 2017 - 11:36:39 EST


Some KVM-specific custom MSRs share the guest physical address with the
hypervisor in early boot. When SEV is active, the shared physical address
must be mapped with memory encryption attribute cleared so that both
hypervisor and guest can access the data.

Add APIs to change the memory encryption attribute in early boot code.

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---

Changes since v5:

early_set_memory_enc_dec() is enhanced to perform encrypt/decrypt and change
the C bit in one go. The changes shields the caller from having to check
the C bit status before changing it and also shield the OS from converting a
page blindly.

Boris,

I removed your R-b since I was not sure if you are okay with the above changes.
please let me know if you are okay with the changes. thanks

arch/x86/include/asm/mem_encrypt.h | 8 +++
arch/x86/mm/mem_encrypt.c | 131 +++++++++++++++++++++++++++++++++++++
2 files changed, 139 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 2b024741bce9..3ba68c92be1b 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -42,6 +42,9 @@ void __init sme_early_init(void);
void __init sme_encrypt_kernel(void);
void __init sme_enable(struct boot_params *bp);

+int __init early_set_memory_decrypted(resource_size_t paddr, unsigned long size);
+int __init early_set_memory_encrypted(resource_size_t paddr, unsigned long size);
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);

@@ -70,6 +73,11 @@ static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; }
static inline bool sev_active(void) { return false; }

+static inline int __init
+early_set_memory_decrypted(resource_size_t paddr, unsigned long size) { return 0; }
+static inline int __init
+early_set_memory_encrypted(resource_size_t paddr, unsigned long size) { return 0; }
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */

/*
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 2336c3922227..b671e91e6a1f 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -30,6 +30,8 @@
#include <asm/msr.h>
#include <asm/cmdline.h>

+#include "mm_internal.h"
+
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static char sme_cmdline_off[] __initdata = "off";
@@ -260,6 +262,135 @@ static void sev_free(struct device *dev, size_t size, void *vaddr,
swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}

+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ pgprot_t old_prot, new_prot;
+ unsigned long pfn, pa, size;
+ pte_t new_pte;
+
+ switch (level) {
+ case PG_LEVEL_4K:
+ pfn = pte_pfn(*kpte);
+ old_prot = pte_pgprot(*kpte);
+ break;
+ case PG_LEVEL_2M:
+ pfn = pmd_pfn(*(pmd_t *)kpte);
+ old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ break;
+ case PG_LEVEL_1G:
+ pfn = pud_pfn(*(pud_t *)kpte);
+ old_prot = pud_pgprot(*(pud_t *)kpte);
+ break;
+ default:
+ return;
+ }
+
+ new_prot = old_prot;
+ if (enc)
+ pgprot_val(new_prot) |= _PAGE_ENC;
+ else
+ pgprot_val(new_prot) &= ~_PAGE_ENC;
+
+ /* if prot is same then do nothing */
+ if (pgprot_val(old_prot) == pgprot_val(new_prot))
+ return;
+
+ pa = pfn << page_level_shift(level);
+ size = page_level_size(level);
+
+ /*
+ * We are going to perform in-place encrypt/decrypt and change the
+ * physical page attribute from C=1 to C=0 or vice versa. Flush the
+ * caches to ensure that data gets accessed with correct C-bit.
+ */
+ clflush_cache_range(__va(pa), size);
+
+ /* encrypt/decrypt the contents in-place */
+ if (enc)
+ sme_early_encrypt(pa, size);
+ else
+ sme_early_decrypt(pa, size);
+
+ /* change the page encryption mask */
+ new_pte = pfn_pte(pfn, new_prot);
+ set_pte_atomic(kpte, new_pte);
+}
+
+static int __init early_set_memory_enc_dec(resource_size_t paddr,
+ unsigned long size, bool enc)
+{
+ unsigned long vaddr, vaddr_end, vaddr_next;
+ unsigned long psize, pmask;
+ int split_page_size_mask;
+ pte_t *kpte;
+ int level, ret;
+
+ vaddr = (unsigned long)__va(paddr);
+ vaddr_next = vaddr;
+ vaddr_end = vaddr + size;
+
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ kpte = lookup_address(vaddr, &level);
+ if (!kpte || pte_none(*kpte)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (level == PG_LEVEL_4K) {
+ __set_clr_pte_enc(kpte, level, enc);
+ vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
+ continue;
+ }
+
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
+ /*
+ * Check, whether we can change the large page in one go.
+ * We request a split, when the address is not aligned and
+ * the number of pages to set/clear encryption bit is smaller
+ * than the number of pages in the large page.
+ */
+ if (vaddr == (vaddr & pmask) &&
+ ((vaddr_end - vaddr) >= psize)) {
+ __set_clr_pte_enc(kpte, level, enc);
+ vaddr_next = (vaddr & pmask) + psize;
+ continue;
+ }
+
+ /*
+ * virtual address is part of large page, create the page table
+ * mapping to use smaller pages (4K or 2M). If virtual address
+ * is part of 2M page the we request spliting the large page
+ * into 4K, similarly 1GB large page is requested to split into
+ * 2M pages.
+ */
+ if (level == PG_LEVEL_2M)
+ split_page_size_mask = 0;
+ else
+ split_page_size_mask = 1 << PG_LEVEL_2M;
+
+ kernel_physical_mapping_init(__pa(vaddr & pmask),
+ __pa((vaddr_end & pmask) + psize),
+ split_page_size_mask);
+ }
+
+ ret = 0;
+out:
+ __flush_tlb_all();
+ return ret;
+}
+
+int __init early_set_memory_decrypted(resource_size_t paddr, unsigned long size)
+{
+ return early_set_memory_enc_dec(paddr, size, false);
+}
+
+int __init early_set_memory_encrypted(resource_size_t paddr, unsigned long size)
+{
+ return early_set_memory_enc_dec(paddr, size, true);
+}
+
/*
* SME and SEV are very similar but they are not the same, so there are
* times that the kernel will need to distinguish between SME and SEV. The
--
2.9.5