[PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime
From: Baoquan He
Date: Fri Dec 09 2016 - 09:42:27 EST
X86 64 kernel takes KERNEL_IMAGE_SIZE as the kernel text mapping size,
and it's fixed as compiling time, changing from 512M to 1G as long as
CONFIG_RANDOMIZE_BASE is enabled, though people specify kernel option
"nokaslr" explicitly.
This could be a wrong behaviour. CONFIG_RANDOMIZE_BASE should only decide
if the KASLR code need be compiled in. If user specify "nokaslr", the
kernel should behave as no KASLR code compiled in at all.
So in this patch, define a new MACRO KERNEL_MAPPING_SIZE to represent the
size of kernel text mapping area, and let KERNEL_IMAGE_SIZE limit the size
of kernel runtime space. And change to determine the size of kernel text
mapping area at runtime. Though KASLR code compiled in, if "nokaslr"
specified, still set kernel mapping size to be 512M.
Signed-off-by: Baoquan He <bhe@xxxxxxxxxx>
Acked-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
v1->v2:
- Move declaration of kernel_mapping_size out of #ifdef CONFIG_RANDOMIZE_MEMORY
scope of <asm/kaslr.h> to remove compiling error if CONFIG_RANDOMIZE_MEMORY is
disabled.
- Change to define _kernel_mapping_size as a static variable inside
boot/compressed/kaslr.c file.
arch/x86/boot/compressed/kaslr.c | 20 +++++++++++++++-----
arch/x86/include/asm/kaslr.h | 1 +
arch/x86/include/asm/page_64_types.h | 19 +++++++++++--------
arch/x86/include/asm/pgtable_64_types.h | 2 +-
arch/x86/kernel/head64.c | 11 ++++++-----
arch/x86/kernel/head_64.S | 3 ++-
arch/x86/mm/dump_pagetables.c | 3 ++-
arch/x86/mm/physaddr.c | 6 +++---
8 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a66854d..823f294 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -22,6 +22,12 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+/*
+ * By default, the size of kernel text mapping equals KERNEL_IMAGE_SIZE.
+ * While x86_64 may extend it to 1G if KASLR is enabled.
+ */
+static unsigned long _kernel_mapping_size = KERNEL_IMAGE_SIZE;
+
static unsigned long rotate_xor(unsigned long hash, const void *area,
size_t size)
{
@@ -311,7 +317,7 @@ static void process_e820_entry(struct e820entry *entry,
return;
/* On 32-bit, ignore entries entirely above our maximum. */
- if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
+ if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= _kernel_mapping_size)
return;
/* Ignore entries entirely below our minimum. */
@@ -341,8 +347,8 @@ static void process_e820_entry(struct e820entry *entry,
/* On 32-bit, reduce region size to fit within max size. */
if (IS_ENABLED(CONFIG_X86_32) &&
- region.start + region.size > KERNEL_IMAGE_SIZE)
- region.size = KERNEL_IMAGE_SIZE - region.start;
+ region.start + region.size > _kernel_mapping_size)
+ region.size = _kernel_mapping_size - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
@@ -408,9 +414,9 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
/*
* There are how many CONFIG_PHYSICAL_ALIGN-sized slots
* that can hold image_size within the range of minimum to
- * KERNEL_IMAGE_SIZE?
+ * _kernel_mapping_size?
*/
- slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
+ slots = (_kernel_mapping_size - minimum - image_size) /
CONFIG_PHYSICAL_ALIGN + 1;
random_addr = kaslr_get_random_long("Virtual") % slots;
@@ -438,6 +444,10 @@ void choose_random_location(unsigned long input,
return;
}
+#ifdef CONFIG_X86_64
+ _kernel_mapping_size = KERNEL_MAPPING_SIZE_EXT;
+#endif
+
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Prepare to add new identity pagetables on demand. */
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 1052a79..093935d 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -2,6 +2,7 @@
#define _ASM_KASLR_H_
unsigned long kaslr_get_random_long(const char *purpose);
+extern unsigned long kernel_mapping_size;
#ifdef CONFIG_RANDOMIZE_MEMORY
extern unsigned long page_offset_base;
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 62a20ea..b8e79d7 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -49,18 +49,21 @@
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
+
+/*
+ * Kernel image size is limited to 512 MB. The kernel code+data+bss
+ * must not be bigger than that.
+ */
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+
/*
- * Kernel image size is limited to 1GiB due to the fixmap living in the
- * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
- * 512MiB by default, leaving 1.5GiB for modules once the page tables
+ * Kernel mapping size is limited to 1GiB due to the fixmap living in
+ * the next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
+ * Use 512MiB by default, leaving 1.5GiB for modules once the page tables
* are fully set up. If kernel ASLR is configured, it can extend the
* kernel page table mapping, reducing the size of the modules area.
*/
#define KERNEL_MAPPING_SIZE_EXT (1024 * 1024 * 1024)
-#if defined(CONFIG_RANDOMIZE_BASE)
-#define KERNEL_IMAGE_SIZE KERNEL_MAPPING_SIZE_EXT
-#else
-#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#endif
+#define KERNEL_MAPPING_SIZE kernel_mapping_size
#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a26420..a357050 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,7 +66,7 @@ typedef struct { pteval_t pte; } pte_t;
#define VMEMMAP_START __VMEMMAP_BASE
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_VADDR (__START_KERNEL_map + KERNEL_MAPPING_SIZE)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372..46d2bd2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -28,6 +28,7 @@
#include <asm/bootparam_utils.h>
#include <asm/microcode.h>
#include <asm/kasan.h>
+#include <asm/cmdline.h>
/*
* Manage page tables very early on.
@@ -36,6 +37,7 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD];
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt = 2;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
+unsigned long kernel_mapping_size = KERNEL_IMAGE_SIZE;
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
@@ -138,12 +140,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
* Build-time sanity checks on the kernel image and module
* area mappings. (these are purely build-time and produce no code)
*/
- BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
- BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
- BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
- BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
- BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
@@ -165,6 +162,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
copy_bootdata(__va(real_mode_data));
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) &&
+ !cmdline_find_option_bool(boot_command_line, "nokaslr"))
+ kernel_mapping_size = KERNEL_MAPPING_SIZE_EXT;
+
/*
* Load microcode early on BSP.
*/
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c4b40e7c9..8bbb29e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -461,7 +461,8 @@ NEXT_PAGE(level2_kernel_pgt)
* 512M if no kaslr, 1G if kaslr enabled. Later cleanup_highmap will
* clean up those unused entries.
*
- * The module area starts after kernel mapping area.
+ * The module area starts after kernel mapping area, see MODULES_VADDR.
+ * It will vary with KERNEL_MAPPING_SIZE.
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
PTRS_PER_PMD)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49a..412c3f5 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -82,7 +82,7 @@ static struct addr_marker address_markers[] = {
{ EFI_VA_END, "EFI Runtime Services" },
# endif
{ __START_KERNEL_map, "High Kernel Mapping" },
- { MODULES_VADDR, "Modules" },
+ { 0/*MODULES_VADDR*/, "Modules" },
{ MODULES_END, "End Modules" },
#else
{ PAGE_OFFSET, "Kernel Mapping" },
@@ -442,6 +442,7 @@ static int __init pt_dump_init(void)
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+ address_markers[MODULES_VADDR_NR].start_address = MODULES_VADDR;
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index cfc3b91..c0b70fc 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -18,7 +18,7 @@ unsigned long __phys_addr(unsigned long x)
if (unlikely(x > y)) {
x = y + phys_base;
- VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+ VIRTUAL_BUG_ON(y >= KERNEL_MAPPING_SIZE);
} else {
x = y + (__START_KERNEL_map - PAGE_OFFSET);
@@ -35,7 +35,7 @@ unsigned long __phys_addr_symbol(unsigned long x)
unsigned long y = x - __START_KERNEL_map;
/* only check upper bounds since lower bounds will trigger carry */
- VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+ VIRTUAL_BUG_ON(y >= KERNEL_MAPPING_SIZE);
return y + phys_base;
}
@@ -50,7 +50,7 @@ bool __virt_addr_valid(unsigned long x)
if (unlikely(x > y)) {
x = y + phys_base;
- if (y >= KERNEL_IMAGE_SIZE)
+ if (y >= KERNEL_MAPPING_SIZE)
return false;
} else {
x = y + (__START_KERNEL_map - PAGE_OFFSET);
--
2.5.5