[RFC v2 15/26] mm/asi: Initialize the ASI page-table with core mappings

From: Alexandre Chartre
Date: Thu Jul 11 2019 - 10:28:00 EST


Core mappings are the minimal mappings we need to be able to
enter isolation and handle an isolation abort or exit. This
includes the kernel code, the GDT and the percpu ASI sessions.
We also need a stack so we map the current stack when entering
isolation and unmap it on exit/abort.

Optionally, additional mappins can be added like the stack canary
or the percpu offset to be able to use get_cpu_var()/this_cpu_ptr()
when isolation is active.

Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx>
---
arch/x86/include/asm/asi.h | 9 ++++-
arch/x86/mm/asi.c | 75 +++++++++++++++++++++++++++++++++++++++---
arch/x86/mm/asi_pagetable.c | 30 ++++++++++++----
3 files changed, 99 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index cf5d198..1ac8fd3 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -11,6 +11,13 @@
#include <asm/pgtable.h>
#include <linux/xarray.h>

+/*
+ * asi_create() map flags. Flags are used to map optional data
+ * when creating an ASI.
+ */
+#define ASI_MAP_STACK_CANARY 0x01 /* map stack canary */
+#define ASI_MAP_CPU_PTR 0x02 /* for get_cpu_var()/this_cpu_ptr() */
+
enum page_table_level {
PGT_LEVEL_PTE,
PGT_LEVEL_PMD,
@@ -73,7 +80,7 @@ struct asi_session {
void asi_init_range_mapping(struct asi *asi);
void asi_fini_range_mapping(struct asi *asi);

-extern struct asi *asi_create(void);
+extern struct asi *asi_create(int map_flags);
extern void asi_destroy(struct asi *asi);
extern int asi_enter(struct asi *asi);
extern void asi_exit(struct asi *asi);
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 25633a6..f049438 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -19,6 +19,17 @@
/* ASI sessions, one per cpu */
DEFINE_PER_CPU_PAGE_ALIGNED(struct asi_session, cpu_asi_session);

+struct asi_map_option {
+ int flag;
+ void *ptr;
+ size_t size;
+};
+
+struct asi_map_option asi_map_percpu_options[] = {
+ { ASI_MAP_STACK_CANARY, &fixed_percpu_data, sizeof(fixed_percpu_data) },
+ { ASI_MAP_CPU_PTR, &this_cpu_off, sizeof(this_cpu_off) },
+};
+
static void asi_log_fault(struct asi *asi, struct pt_regs *regs,
unsigned long error_code, unsigned long address)
{
@@ -85,16 +96,55 @@ bool asi_fault(struct pt_regs *regs, unsigned long error_code,
return true;
}

-static int asi_init_mapping(struct asi *asi)
+static int asi_init_mapping(struct asi *asi, int flags)
{
+ struct asi_map_option *option;
+ int i, err;
+
+ /*
+ * Map the kernel.
+ *
+ * XXX We should check if we can map only kernel text, i.e. map with
+ * size = _etext - _text
+ */
+ err = asi_map(asi, (void *)__START_KERNEL_map, KERNEL_IMAGE_SIZE);
+ if (err)
+ return err;
+
/*
- * TODO: Populate the ASI page-table with minimal mappings so
- * that we can at least enter isolation and abort.
+ * Map the cpu_entry_area because we need the GDT to be mapped.
+ * Not sure we need anything else from cpu_entry_area.
*/
+ err = asi_map_range(asi, (void *)CPU_ENTRY_AREA_PER_CPU, P4D_SIZE,
+ PGT_LEVEL_P4D);
+ if (err)
+ return err;
+
+ /*
+ * Map the percpu ASI sessions. This is used by interrupt handlers
+ * to figure out if we have entered isolation and switch back to
+ * the kernel address space.
+ */
+ err = ASI_MAP_CPUVAR(asi, cpu_asi_session);
+ if (err)
+ return err;
+
+ /*
+ * Optional percpu mappings.
+ */
+ for (i = 0; i < ARRAY_SIZE(asi_map_percpu_options); i++) {
+ option = &asi_map_percpu_options[i];
+ if (flags & option->flag) {
+ err = asi_map_percpu(asi, option->ptr, option->size);
+ if (err)
+ return err;
+ }
+ }
+
return 0;
}

-struct asi *asi_create(void)
+struct asi *asi_create(int map_flags)
{
struct page *page;
struct asi *asi;
@@ -115,7 +165,7 @@ struct asi *asi_create(void)
spin_lock_init(&asi->fault_lock);
asi_init_backend(asi);

- err = asi_init_mapping(asi);
+ err = asi_init_mapping(asi, map_flags);
if (err)
goto error;

@@ -159,6 +209,7 @@ int asi_enter(struct asi *asi)
struct asi *current_asi;
struct asi_session *asi_session;
unsigned long original_cr3;
+ int err;

state = this_cpu_read(cpu_asi_session.state);
/*
@@ -190,6 +241,13 @@ int asi_enter(struct asi *asi)
WARN_ON(asi_session->abort_depth > 0);

/*
+ * We need a stack to run with isolation, so map the current stack.
+ */
+ err = asi_map(asi, current->stack, PAGE_SIZE << THREAD_SIZE_ORDER);
+ if (err)
+ goto err_clear_asi;
+
+ /*
* Instructions ordering is important here because we should be
* able to deal with any interrupt/exception which will abort
* the isolation and restore CR3 to its original value:
@@ -211,7 +269,7 @@ int asi_enter(struct asi *asi)
if (!original_cr3) {
WARN_ON(1);
err = -EINVAL;
- goto err_clear_asi;
+ goto err_unmap_stack;
}
asi_session->original_cr3 = original_cr3;

@@ -228,6 +286,8 @@ int asi_enter(struct asi *asi)

return 0;

+err_unmap_stack:
+ asi_unmap(asi, current->stack);
err_clear_asi:
asi_session->asi = NULL;
asi_session->task = NULL;
@@ -284,6 +344,9 @@ void asi_exit(struct asi *asi)
* exit isolation before abort_depth reaches 0.
*/
asi_session->abort_depth = 0;
+
+ /* unmap stack */
+ asi_unmap(asi, current->stack);
}
EXPORT_SYMBOL(asi_exit);

diff --git a/arch/x86/mm/asi_pagetable.c b/arch/x86/mm/asi_pagetable.c
index f1ee65b..bcc95f2 100644
--- a/arch/x86/mm/asi_pagetable.c
+++ b/arch/x86/mm/asi_pagetable.c
@@ -710,12 +710,20 @@ int asi_map_range(struct asi *asi, void *ptr, size_t size,
map_addr = round_down(addr, page_dir_size);
map_end = round_up(end, page_dir_size);

- pr_debug("ASI %p: MAP %px/%lx/%d -> %lx-%lx\n", asi, ptr, size, level,
- map_addr, map_end);
- if (map_addr < addr)
- pr_debug("ASI %p: MAP LEAK %lx-%lx\n", asi, map_addr, addr);
- if (map_end > end)
- pr_debug("ASI %p: MAP LEAK %lx-%lx\n", asi, end, map_end);
+ /*
+ * Don't log info the current stack because it is mapped/unmapped
+ * everytime we enter/exit isolation.
+ */
+ if (ptr != current->stack) {
+ pr_debug("ASI %p: MAP %px/%lx/%d -> %lx-%lx\n",
+ asi, ptr, size, level, map_addr, map_end);
+ if (map_addr < addr)
+ pr_debug("ASI %p: MAP LEAK %lx-%lx\n",
+ asi, map_addr, addr);
+ if (map_end > end)
+ pr_debug("ASI %p: MAP LEAK %lx-%lx\n",
+ asi, end, map_end);
+ }

spin_lock_irqsave(&asi->lock, flags);

@@ -989,8 +997,14 @@ void asi_unmap(struct asi *asi, void *ptr)

addr = (unsigned long)range_mapping->ptr;
end = addr + range_mapping->size;
- pr_debug("ASI %p: UNMAP %px/%lx/%d\n", asi, ptr,
- range_mapping->size, range_mapping->level);
+ /*
+ * Don't log info the current stack because it is mapped/unmapped
+ * everytime we enter/exit isolation.
+ */
+ if (ptr != current->stack) {
+ pr_debug("ASI %p: UNMAP %px/%lx/%d\n", asi, ptr,
+ range_mapping->size, range_mapping->level);
+ }
list_del(&range_mapping->list);
asi_unmap_overlap(asi, range_mapping);
kfree(range_mapping);
--
1.7.1