[PATCH v3 3/4] x86/mm: add .data..decrypted section to hold shared variables

From: Brijesh Singh
Date: Wed Aug 29 2018 - 14:25:31 EST


kvmclock defines few static variables which are shared with the
hypervisor during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if guest OS wants to share the memory region with hypervisor then it must
clear the C-bit before sharing it. Currently, we use
kernel_physical_mapping_init() to split large pages before clearing the
C-bit on shared pages. But it fails when called from the kvmclock
initialization (mainly because memblock allocator is not ready that early
during boot).

Add a __decrypted section attribute which can be used when defining
such shared variable. The so-defined variables will be placed in the
.data..decrypted section. This section is mapped with C=0 early
during boot, we also ensure that the initialized values are updated
to match with C=0 (i.e perform an in-place decryption). The
.data..decrypted section is PMD-aligned and sized so that we avoid
the need to split the large pages when mapping the section.

The sme_encrypt_kernel() was used to perform the in-place encryption
of the Linux kernel and initrd when SME is active. The routine has been
enhanced to decrypt the .data..decrypted section for both SME and SEV
cases.

Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: kvm@xxxxxxxxxxxxxxx
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Cc: kvm@xxxxxxxxxxxxxxx
Cc: "Radim KrÄmÃÅ" <rkrcmar@xxxxxxxxxx>
---
arch/x86/include/asm/mem_encrypt.h | 6 +++
arch/x86/kernel/head64.c | 11 +++++
arch/x86/kernel/vmlinux.lds.S | 17 +++++++
arch/x86/mm/mem_encrypt_identity.c | 94 ++++++++++++++++++++++++++++++++------
4 files changed, 113 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c064383..802b2eb 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -52,6 +52,8 @@ void __init mem_encrypt_init(void);
bool sme_active(void);
bool sev_active(void);

+#define __decrypted __attribute__((__section__(".data..decrypted")))
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */

#define sme_me_mask 0ULL
@@ -77,6 +79,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }

+#define __decrypted
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */

/*
@@ -88,6 +92,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)

+extern char __start_data_decrypted[], __end_data_decrypted[];
+
#endif /* __ASSEMBLY__ */

#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379..af39d68 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +235,16 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);

+ /* Clear the memory encryption mask from the .data..decrypted section. */
+ if (mem_encrypt_active()) {
+ vaddr = (unsigned long)__start_data_decrypted;
+ vaddr_end = (unsigned long)__end_data_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a4..78d3169 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -89,6 +89,21 @@ PHDRS {
note PT_NOTE FLAGS(0); /* ___ */
}

+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid spliting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define DATA_DECRYPTED \
+ . = ALIGN(PMD_SIZE); \
+ __start_data_decrypted = .; \
+ *(.data..decrypted); \
+ . = ALIGN(PMD_SIZE); \
+ __end_data_decrypted = .; \
+
SECTIONS
{
#ifdef CONFIG_X86_32
@@ -171,6 +186,8 @@ SECTIONS
/* rarely changed data like cpu maps */
READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)

+ DATA_DECRYPTED
+
/* End of data section */
_edata = .;
} :data
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 7659e65..08e70ba 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -51,6 +51,8 @@
(_PAGE_PAT | _PAGE_PWT))

#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+#define PMD_FLAGS_ENC_WP ((PMD_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))

#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)

@@ -59,6 +61,8 @@
(_PAGE_PAT | _PAGE_PWT))

#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+#define PTE_FLAGS_ENC_WP ((PTE_FLAGS_ENC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))

struct sme_populate_pgd_data {
void *pgtable_area;
@@ -231,6 +235,11 @@ static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}

+static void __init sme_map_range_encrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC_WP, PTE_FLAGS_ENC_WP);
+}
+
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
@@ -378,7 +387,10 @@ static void __init build_workarea_map(struct boot_params *bp,
ppd->paddr = workarea_start;
ppd->vaddr = workarea_start;
ppd->vaddr_end = workarea_end;
- sme_map_range_decrypted(ppd);
+ if (sev_active())
+ sme_map_range_encrypted(ppd);
+ else
+ sme_map_range_decrypted(ppd);

/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
@@ -435,16 +447,27 @@ static void __init build_workarea_map(struct boot_params *bp,
sme_map_range_decrypted_wp(ppd);
}

- /* Add decrypted workarea mappings to both kernel mappings */
+ /*
+ * When SEV is active, kernel is already encrypted hence mapping
+ * the initial workarea_start as encrypted. When SME is active,
+ * the kernel is not encrypted hence add decrypted workarea
+ * mappings to both kernel mappings.
+ */
ppd->paddr = workarea_start;
ppd->vaddr = workarea_start;
ppd->vaddr_end = workarea_end;
- sme_map_range_decrypted(ppd);
+ if (sev_active())
+ sme_map_range_encrypted(ppd);
+ else
+ sme_map_range_decrypted(ppd);

ppd->paddr = workarea_start;
ppd->vaddr = workarea_start + decrypted_base;
ppd->vaddr_end = workarea_end + decrypted_base;
- sme_map_range_decrypted(ppd);
+ if (sev_active())
+ sme_map_range_encrypted(ppd);
+ else
+ sme_map_range_decrypted(ppd);

wa->kernel_start = kernel_start;
wa->kernel_end = kernel_end;
@@ -487,28 +510,69 @@ static void __init teardown_workarea_map(struct sme_workarea_data *wa,
native_write_cr3(__native_read_cr3());
}

+static void __init decrypt_shared_data(struct sme_workarea_data *wa,
+ struct sme_populate_pgd_data *ppd)
+{
+ unsigned long decrypted_start, decrypted_end, decrypted_len;
+
+ /* Physical addresses of decrypted data section */
+ decrypted_start = __pa_symbol(__start_data_decrypted);
+ decrypted_end = ALIGN(__pa_symbol(__end_data_decrypted), PMD_PAGE_SIZE);
+ decrypted_len = decrypted_end - decrypted_start;
+
+ if (!decrypted_len)
+ return;
+
+ /* Add decrypted mapping for the section (identity) */
+ ppd->paddr = decrypted_start;
+ ppd->vaddr = decrypted_start;
+ ppd->vaddr_end = decrypted_end;
+ sme_map_range_decrypted(ppd);
+
+ /* Add encrypted-wp mapping for the section (non-identity) */
+ ppd->paddr = decrypted_start;
+ ppd->vaddr = decrypted_start + wa->decrypted_base;
+ ppd->vaddr_end = decrypted_end + wa->decrypted_base;
+ sme_map_range_encrypted_wp(ppd);
+
+ /* Perform in-place decryption */
+ sme_encrypt_execute(decrypted_start,
+ decrypted_start + wa->decrypted_base,
+ decrypted_len, wa->workarea_start,
+ (unsigned long)ppd->pgd);
+
+ ppd->vaddr = decrypted_start + wa->decrypted_base;
+ ppd->vaddr_end = decrypted_end + wa->decrypted_base;
+ sme_clear_pgd(ppd);
+}
+
void __init sme_encrypt_kernel(struct boot_params *bp)
{
struct sme_populate_pgd_data ppd;
struct sme_workarea_data wa;

- if (!sme_active())
+ if (!mem_encrypt_active())
return;

build_workarea_map(bp, &wa, &ppd);

- /* When SEV is active, encrypt kernel and initrd */
- sme_encrypt_execute(wa.kernel_start,
- wa.kernel_start + wa.decrypted_base,
- wa.kernel_len, wa.workarea_start,
- (unsigned long)ppd.pgd);
-
- if (wa.initrd_len)
- sme_encrypt_execute(wa.initrd_start,
- wa.initrd_start + wa.decrypted_base,
- wa.initrd_len, wa.workarea_start,
+ /* When SME is active, encrypt kernel and initrd */
+ if (sme_active()) {
+ sme_encrypt_execute(wa.kernel_start,
+ wa.kernel_start + wa.decrypted_base,
+ wa.kernel_len, wa.workarea_start,
(unsigned long)ppd.pgd);

+ if (wa.initrd_len)
+ sme_encrypt_execute(wa.initrd_start,
+ wa.initrd_start + wa.decrypted_base,
+ wa.initrd_len, wa.workarea_start,
+ (unsigned long)ppd.pgd);
+ }
+
+ /* Decrypt the contents of .data..decrypted section */
+ decrypt_shared_data(&wa, &ppd);
+
teardown_workarea_map(&wa, &ppd);
}

--
2.7.4