[PATCH v7 5/5] x86/kvm: Avoid dynamic allocation of pvclock data when SEV is active
From: Brijesh Singh
Date: Mon Sep 10 2018 - 17:50:30 EST
In the preparatory stage of CPU hotplug, the per-cpu pvclock data pointer
assigns either an element of the static array or dynamically allocated
memory for the pvclock data pointer. Currently, the dynamically allocated
memory is not mapped decrypted. However, when SEV is active this memory
range must be mapped decrypted.
The C-bit determines the encryption status of a 4K page hence a full 4K
page allocation would be required to store a single 32-byte pvclock
variable. This could waste a fairly sizeable amount of memory since each
CPU will perform a separate 4K allocation.
Instead, define a second static array which will be used when SEV is
active. This array will be put in the .data..decrypted section so that it
is mapped decrypted during boot.
The .data..decrypted section has a big chunk of memory that is currently
unused. Since the second array will be used only when memory
encryption is active, free it when memory encryption is not active.
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
Suggested-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: kvm@xxxxxxxxxxxxxxx
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Cc: "Radim KrÄmÃÅ" <rkrcmar@xxxxxxxxxx>
---
arch/x86/include/asm/mem_encrypt.h | 4 ++++
arch/x86/kernel/kvmclock.c | 14 ++++++++++++++
arch/x86/kernel/vmlinux.lds.S | 3 +++
arch/x86/mm/init.c | 3 +++
arch/x86/mm/mem_encrypt.c | 10 ++++++++++
5 files changed, 34 insertions(+)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 802b2eb..3f2a5e3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,11 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);
bool sme_active(void);
bool sev_active(void);
#define __decrypted __attribute__((__section__(".data..decrypted")))
+#define __decrypted_aux __attribute__((__section__(".data..decrypted.aux")))
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -80,6 +82,7 @@ static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
#define __decrypted
+#define __decrypted_aux
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -93,6 +96,7 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
extern char __start_data_decrypted[], __end_data_decrypted[];
+extern char __start_data_decrypted_aux[];
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 0b3110b..9d8bad5 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -65,6 +65,15 @@ static struct pvclock_vsyscall_time_info
static struct pvclock_wall_clock wall_clock __decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/*
+ * The auxiliary array will be used when SEV is active. In non-SEV case,
+ * it will be freed by mem_encrypt_free_decrypted_mem().
+ */
+static struct pvclock_vsyscall_time_info
+ hv_clock_aux[NR_CPUS] __decrypted_aux;
+#endif
+
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
return &this_cpu_read(hv_clock_per_cpu)->pvti;
@@ -269,6 +278,11 @@ static int kvmclock_setup_percpu(unsigned int cpu)
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* Use the static page from auxiliary array instead of allocating it. */
+ else if (sev_active())
+ p = &hv_clock_aux[cpu - HVC_BOOT_ARRAY_SIZE];
+#endif
else
p = kzalloc(sizeof(*p), GFP_KERNEL);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index ae8153e..b78e117 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -77,6 +77,9 @@ jiffies_64 = jiffies;
. = ALIGN(PMD_SIZE); \
__start_data_decrypted = .; \
*(.data..decrypted); \
+ . = ALIGN(PAGE_SIZE); \
+ __start_data_decrypted_aux = .; \
+ *(.data..decrypted.aux); \
. = ALIGN(PMD_SIZE); \
__end_data_decrypted = .; \
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26..b3cc33d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,9 +815,12 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}
+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
void __ref free_initmem(void)
{
e820__reallocate_tables();
+ mem_encrypt_free_decrypted_mem();
free_kernel_image_pages(&__init_begin, &__init_end);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398..f1ab7f5 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,16 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);
/* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+ if (mem_encrypt_active())
+ return;
+
+ free_init_pages("unused decrypted",
+ (unsigned long)__start_data_decrypted_aux,
+ (unsigned long)__end_data_decrypted);
+}
+
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)
--
2.7.4