[tip:x86/urgent] x86/mm: Add .bss..decrypted section to hold shared variables

From: tip-bot for Brijesh Singh
Date: Sat Sep 15 2018 - 14:52:18 EST


Commit-ID: b3f0907c71e006e12fde74ea9a745b6096b6f90f
Gitweb: https://git.kernel.org/tip/b3f0907c71e006e12fde74ea9a745b6096b6f90f
Author: Brijesh Singh <brijesh.singh@xxxxxxx>
AuthorDate: Fri, 14 Sep 2018 08:45:58 -0500
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Sat, 15 Sep 2018 20:48:45 +0200

x86/mm: Add .bss..decrypted section to hold shared variables

kvmclock defines few static variables which are shared with the
hypervisor during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if the guest OS wants to share the memory region with the hypervisor
then it must clear the C-bit before sharing it.

Currently, we use kernel_physical_mapping_init() to split large pages
before clearing the C-bit on shared pages. But it fails when called from
the kvmclock initialization (mainly because the memblock allocator is
not ready that early during boot).

Add a __bss_decrypted section attribute which can be used when defining
such shared variable. The so-defined variables will be placed in the
.bss..decrypted section. This section will be mapped with C=0 early
during boot.

The .bss..decrypted section has a big chunk of memory that may be unused
when memory encryption is not active, free it when memory encryption is
not active.

Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Cc: Radim KrÄmÃÅ<rkrcmar@xxxxxxxxxx>
Cc: kvm@xxxxxxxxxxxxxxx
Link: https://lkml.kernel.org/r/1536932759-12905-2-git-send-email-brijesh.singh@xxxxxxx

---
arch/x86/include/asm/mem_encrypt.h | 7 +++++++
arch/x86/kernel/head64.c | 16 ++++++++++++++++
arch/x86/kernel/vmlinux.lds.S | 19 +++++++++++++++++++
arch/x86/mm/init.c | 4 ++++
arch/x86/mm/mem_encrypt.c | 24 ++++++++++++++++++++++++
5 files changed, 70 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);

/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);

bool sme_active(void);
bool sev_active(void);

+#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */

#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }

+#define __bss_decrypted
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */

/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)

+extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+
#endif /* __ASSEMBLY__ */

#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..c16af27eb23f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +235,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);

+ /*
+ * Clear the memory encryption mask from the .bss..decrypted section.
+ * The bss section will be memset to zero later in the initialization so
+ * there is no need to zero it after changing the memory encryption
+ * attribute.
+ */
+ if (mem_encrypt_active()) {
+ vaddr = (unsigned long)__start_bss_decrypted;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);

+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid splitting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define BSS_DECRYPTED \
+ . = ALIGN(PMD_SIZE); \
+ __start_bss_decrypted = .; \
+ *(.bss..decrypted); \
+ . = ALIGN(PAGE_SIZE); \
+ __start_bss_decrypted_unused = .; \
+ . = ALIGN(PMD_SIZE); \
+ __end_bss_decrypted = .; \
+
#else

#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;

#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
+#define BSS_DECRYPTED

#endif

@@ -355,6 +373,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
+ BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}

+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
void __ref free_initmem(void)
{
e820__reallocate_tables();

+ mem_encrypt_free_decrypted_mem();
+
free_kernel_image_pages(&__init_begin, &__init_end);
}

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);

/* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+ unsigned long vaddr, vaddr_end, npages;
+ int r;
+
+ vaddr = (unsigned long)__start_bss_decrypted_unused;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+
+ /*
+ * The unused memory range was mapped decrypted, change the encryption
+ * attribute from decrypted to encrypted before freeing it.
+ */
+ if (mem_encrypt_active()) {
+ r = set_memory_encrypted(vaddr, npages);
+ if (r) {
+ pr_warn("failed to free unused decrypted pages\n");
+ return;
+ }
+ }
+
+ free_init_pages("unused decrypted", vaddr, vaddr_end);
+}
+
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)