Re: [RFC PATCH] kaslr: get ACPI SRAT table to avoid movable memory
From: Chao Fan
Date: Fri Aug 18 2017 - 05:05:58 EST
Hi all,
Here is my POC mail:
https://www.spinics.net/lists/kernel/msg2571811.html
Since no reply, so I made this RFC PATCH.
I ran it in QEMU guest. It can get and print the mem_affinity.
But no physical machine available right now.
If there is something wrong, please let me know.
If someone has a better method to handle the movable memory,
please tell me.
Thanks,
Chao Fan
On Fri, Aug 18, 2017 at 04:58:20PM +0800, Chao Fan wrote:
>KASLR should choose the memory region of immovable node to extract kernel.
>So get ACPI SRAT table and store the memory region of movable node which
>kaslr shold avoid.
>
>Signed-off-by: Chao Fan <fanc.fnst@xxxxxxxxxxxxxx>
>---
> arch/x86/boot/compressed/kaslr.c | 231 +++++++++++++++++++++++++++++++++++++++
> arch/x86/boot/compressed/misc.h | 27 +++++
> 2 files changed, 258 insertions(+)
>
>diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>index 7de23bb279ce..3b8c111b8a84 100644
>--- a/arch/x86/boot/compressed/kaslr.c
>+++ b/arch/x86/boot/compressed/kaslr.c
>@@ -45,6 +45,11 @@
> #define STATIC
> #include <linux/decompress/mm.h>
>
>+#include <linux/efi.h>
>+#include <linux/acpi.h>
>+#include <linux/numa.h>
>+#include <asm/efi.h>
>+
> extern unsigned long get_cmd_line_ptr(void);
>
> /* Simplified build-specific string for starting entropy. */
>@@ -94,6 +99,18 @@ static bool memmap_too_large;
> /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
> unsigned long long mem_limit = ULLONG_MAX;
>
>+/* Store the max numbers of acpi tables */
>+#define ACPI_MAX_TABLES 128
>+
>+/* Store the movable memory */
>+static struct {
>+ u64 start;
>+ u64 end;
>+} movable_mem[MAX_NUMNODES*2];
>+
>+/* Store the num of movable mem affinity */
>+static int num_movable_ma;
>+
>
> enum mem_avoid_index {
> MEM_AVOID_ZO_RANGE = 0,
>@@ -257,6 +274,180 @@ static int handle_mem_memmap(void)
> return 0;
> }
>
>+static void handle_movable_node(void)
>+{
>+ struct acpi_table_desc table_descs[ACPI_MAX_TABLES];
>+ struct acpi_table_header *table_header;
>+ struct acpi_srat_mem_affinity *ma;
>+ struct acpi_subtable_header *asth;
>+ acpi_physical_address root_table;
>+ acpi_physical_address acpi_table;
>+ acpi_physical_address rsdp_addr;
>+ struct acpi_table_header *th;
>+ efi_system_table_t *systab;
>+ unsigned long table_size;
>+ unsigned long table_end;
>+ bool use_rsdt = false;
>+ bool acpi_20 = false;
>+ bool efi_64 = false;
>+ void *config_tables;
>+ int size, total_size;
>+ u32 table_entry_size;
>+ struct efi_info *e;
>+ u8 *table_entry;
>+ u32 table_count;
>+ char *args;
>+ char *sig;
>+ u32 len;
>+ int i, j;
>+
>+ args = (char *)get_cmd_line_ptr();
>+ if (!strstr(args, "movable_node"))
>+ return;
>+
>+ e = &boot_params->efi_info;
>+ sig = (char *)&e->efi_loader_signature;
>+
>+ if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4))
>+ efi_64 = true;
>+ else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4))
>+ efi_64 = false;
>+ else {
>+ debug_putstr("Wrong efi loader signature.\n");
>+ return;
>+ }
>+
>+ // Get systab from boot params
>+#ifdef CONFIG_X86_32
>+ if (e->efi_systab_hi || e->efi_memmap_hi) {
>+ debug_putstr("Table located above 4GB, disabling EFI.\n");
>+ return;
>+ }
>+ systab = (efi_system_table_t *)e->efi_systab;
>+#else
>+ systab = (efi_system_table_t *)(e->efi_systab |
>+ ((__u64)e->efi_systab_hi<<32));
>+#endif
>+
>+ // Get efi tables from systab
>+ size = efi_64 ? sizeof(efi_config_table_64_t) :
>+ sizeof(efi_config_table_32_t);
>+ total_size = systab->nr_tables * size;
>+
>+ for (i = 0; i < systab->nr_tables; i++) {
>+ efi_guid_t guid;
>+ unsigned long table;
>+
>+ config_tables = (void *)(systab->tables + size * i);
>+ if (efi_64) {
>+ efi_config_table_64_t *tmp_table;
>+
>+ tmp_table = (efi_config_table_64_t *)config_tables;
>+ guid = tmp_table->guid;
>+ table = tmp_table->table;
>+#ifndef CONFIG_64BIT
>+ if (table >> 32) {
>+ debug_putstr
>+ ("Table located above 4G, disabling EFI.\n");
>+ return -EINVAL;
>+ }
>+#endif
>+ } else {
>+ efi_config_table_32_t *tmp_table;
>+
>+ tmp_table = (efi_config_table_32_t *)config_tables;
>+ guid = tmp_table->guid;
>+ table = tmp_table->table;
>+ }
>+
>+ // Get rsdp from efi tables
>+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)) && !acpi_20) {
>+ rsdp_addr = (acpi_physical_address)table;
>+ acpi_20 = false;
>+ } else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) {
>+ rsdp_addr = (acpi_physical_address)table;
>+ acpi_20 = true;
>+ }
>+ }
>+
>+ // Get rsdt or xsdt from rsdp
>+ if (strstr(args, "acpi=rsdt"))
>+ use_rsdt = true;
>+
>+ if (!(use_rsdt) && (acpi_20) &&
>+ ((((struct acpi_table_rsdp *)rsdp_addr)->revision) > 1)) {
>+ root_table = ((struct acpi_table_rsdp *)
>+ rsdp_addr)->xsdt_physical_address;
>+ table_entry_size = ACPI_XSDT_ENTRY_SIZE;
>+ } else {
>+ root_table = ((struct acpi_table_rsdp *)
>+ rsdp_addr)->rsdt_physical_address;
>+ table_entry_size = ACPI_RSDT_ENTRY_SIZE;
>+ }
>+
>+ // Get acpi root table from rsdt or xsdt
>+ th = (struct acpi_table_header *)root_table;
>+ len = th->length;
>+ table_count = (u32)((len - sizeof(struct acpi_table_header)) /
>+ table_entry_size);
>+ table_entry = ACPI_ADD_PTR(u8, th, sizeof(struct acpi_table_header));
>+
>+ for (i = 0; i < table_count; i++) {
>+ u64 address64;
>+
>+ memset(&table_descs[i], 0, sizeof(struct acpi_table_desc));
>+ if (table_entry_size == ACPI_RSDT_ENTRY_SIZE)
>+ acpi_table = ((acpi_physical_address)
>+ (*ACPI_CAST_PTR(u32, table_entry)));
>+ else {
>+ ACPI_MOVE_64_TO_64(&address64, table_entry);
>+ acpi_table = (acpi_physical_address) address64;
>+ }
>+
>+ if (acpi_table) {
>+ table_descs[i].address = acpi_table;
>+ table_descs[i].length =
>+ sizeof(struct acpi_table_header);
>+ table_descs[i].pointer =
>+ (struct acpi_table_header *)acpi_table;
>+ for (j = 0; j < 4; j++)
>+ table_descs[i].signature.ascii[j] =
>+ ((struct acpi_table_header *)
>+ acpi_table)->signature[j];
>+ }
>+
>+ if (!strncmp(table_descs[i].signature.ascii, "SRAT", 4)) {
>+ table_header = table_descs[i].pointer;
>+ break;
>+ }
>+
>+ table_entry += table_entry_size;
>+ }
>+
>+ // Get acpi srat mem affinity frpm acpi root table
>+ table_size = sizeof(struct acpi_table_srat);
>+ table_end = (unsigned long)table_header + table_header->length;
>+ asth = (struct acpi_subtable_header *)
>+ ((unsigned long)table_header + table_size);
>+ j = 0;
>+
>+ while (((unsigned long)asth) +
>+ sizeof(struct acpi_subtable_header) < table_end) {
>+ if (asth->type == 1) {
>+ ma = (struct acpi_srat_mem_affinity *)asth;
>+ if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
>+ movable_mem[j].start = ma->base_address;
>+ movable_mem[j].end = ma->base_address +
>+ ma->length - 1;
>+ j++;
>+ }
>+ }
>+ asth = (struct acpi_subtable_header *)
>+ ((unsigned long)asth + asth->length);
>+ }
>+ num_movable_ma = j;
>+}
>+
> /*
> * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
> * The mem_avoid array is used to store the ranges that need to be avoided
>@@ -380,6 +571,11 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
> /* Mark the memmap regions we need to avoid */
> handle_mem_memmap();
>
>+#ifdef CONFIG_EFI
>+ /* Mark the hotplug SB regions we need choose */
>+ handle_movable_node();
>+#endif
>+
> #ifdef CONFIG_X86_VERBOSE_BOOTUP
> /* Make sure video RAM can be used. */
> add_identity_map(0, PMD_SIZE);
>@@ -481,6 +677,36 @@ static unsigned long slots_fetch_random(void)
> return 0;
> }
>
>+static int check_movable_memory(struct mem_vector *entry)
>+{
>+ int i;
>+ unsigned long long start;
>+ unsigned long long end;
>+
>+ start = entry->start;
>+ end = entry->start + entry->size - 1;
>+
>+ if (num_movable_ma == 0)
>+ return 0;
>+
>+ for (i = 0; i < num_movable_ma; i++) {
>+ if ((start >= movable_mem[i].start) &&
>+ (start <= movable_mem[i].end))
>+ return 1;
>+
>+ if ((end >= movable_mem[i].start) &&
>+ (end <= movable_mem[i].end))
>+ return 1;
>+
>+ if (start > movable_mem[i].end)
>+ continue;
>+
>+ if (end < movable_mem[i].start)
>+ break;
>+ }
>+ return 0;
>+}
>+
> static void process_mem_region(struct mem_vector *entry,
> unsigned long minimum,
> unsigned long image_size)
>@@ -502,6 +728,11 @@ static void process_mem_region(struct mem_vector *entry,
> end = min(entry->size + entry->start, mem_limit);
> if (entry->start >= end)
> return;
>+
>+ /* Ignore the memory region of movable_node */
>+ if (check_movable_memory(entry))
>+ return;
>+
> cur_entry.start = entry->start;
> cur_entry.size = end - entry->start;
>
>diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
>index 766a5211f827..5f514959b2f1 100644
>--- a/arch/x86/boot/compressed/misc.h
>+++ b/arch/x86/boot/compressed/misc.h
>@@ -109,3 +109,30 @@ static inline void console_init(void)
> #endif
>
> #endif
>+
>+#ifdef ACPI_BIG_ENDIAN
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0]; }
>+#else
>+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{*(u64 *)(void *)(d) = *(u64 *)(void *)(s)}
>+#else
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7]; }
>+#endif
>+#endif
>--
>2.13.4
>