[patch 47/60] x86/ldt: Map LDT entries into fixmap

From: Thomas Gleixner
Date: Mon Dec 04 2017 - 11:52:25 EST


From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

LDT is not really commonly used on 64bit so the overhead of populating the
fixmap entries on context switch for the rare LDT syscall users is a
reasonable trade off vs. having extra dynamically managed mapping space per
process.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/include/asm/mmu_context.h | 44 ++++--------------
arch/x86/kernel/ldt.c | 87 +++++++++++++++++++++++++++++++------
2 files changed, 84 insertions(+), 47 deletions(-)

--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -45,13 +45,17 @@ static inline void load_mm_cr4(struct mm
*/
struct ldt_struct {
/*
- * Xen requires page-aligned LDTs with special permissions. This is
- * needed to prevent us from installing evil descriptors such as
+ * Xen requires page-aligned LDTs with special permissions. This
+ * is needed to prevent us from installing evil descriptors such as
* call gates. On native, we could merge the ldt_struct and LDT
- * allocations, but it's not worth trying to optimize.
+ * allocations, but it's not worth trying to optimize and it does
+ * not work with page table isolation enabled, which requires
+ * page-aligned LDT entries as well.
*/
- struct desc_struct *entries_va;
- unsigned int nr_entries;
+ struct desc_struct *entries_va;
+ phys_addr_t entries_pa;
+ unsigned int nr_entries;
+ unsigned int order;
};

/*
@@ -59,6 +63,7 @@ struct ldt_struct {
*/
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
+void load_mm_ldt(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline int init_new_context_ldt(struct task_struct *tsk,
struct mm_struct *mm)
@@ -66,38 +71,11 @@ static inline int init_new_context_ldt(s
return 0;
}
static inline void destroy_context_ldt(struct mm_struct *mm) {}
-#endif
-
static inline void load_mm_ldt(struct mm_struct *mm)
{
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /* READ_ONCE synchronizes with smp_store_release */
- ldt = READ_ONCE(mm->context.ldt);
-
- /*
- * Any change to mm->context.ldt is followed by an IPI to all
- * CPUs with the mm active. The LDT will not be freed until
- * after the IPI is handled by all such CPUs. This means that,
- * if the ldt_struct changes before we return, the values we see
- * will be safe, and the new values will be loaded before we run
- * any user code.
- *
- * NB: don't try to convert this to use RCU without extreme care.
- * We would still need IRQs off, because we don't want to change
- * the local LDT after an IPI loaded a newer value than the one
- * that we can see.
- */
-
- if (unlikely(ldt))
- set_ldt(ldt->entries_va, ldt->nr_entries);
- else
- clear_LDT();
-#else
clear_LDT();
-#endif
}
+#endif

static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,6 +22,7 @@
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+#include <asm/fixmap.h>

static void refresh_ldt_segments(void)
{
@@ -42,6 +43,61 @@ static void refresh_ldt_segments(void)
#endif
}

+#ifdef CONFIG_KERNEL_PAGE_TABLE_ISOLATION
+
+#define LDT_EPP (PAGE_SIZE / LDT_ENTRY_SIZE)
+
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+ phys_addr_t pa = ldt->entries_pa;
+ void *fixva;
+ int idx, i;
+
+ if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_KPTI)) {
+ set_ldt(ldt->entries_va, ldt->nr_entries);
+ return;
+ }
+
+ idx = get_cpu_entry_area_index(smp_processor_id(), ldt_entries);
+ fixva = (void *) __fix_to_virt(idx);
+ for (i = 0; i < ldt->nr_entries; idx--, i += LDT_EPP, pa += PAGE_SIZE)
+ __set_fixmap(idx, pa, PAGE_KERNEL);
+ set_ldt(fixva, ldt->nr_entries);
+}
+#else
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+ set_ldt(ldt->entries_va, ldt->nr_entries);
+}
+#endif
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+ struct ldt_struct *ldt;
+
+ /* READ_ONCE synchronizes with smp_store_release */
+ ldt = READ_ONCE(mm->context.ldt);
+
+ /*
+ * Any change to mm->context.ldt is followed by an IPI to all
+ * CPUs with the mm active. The LDT will not be freed until
+ * after the IPI is handled by all such CPUs. This means that,
+ * if the ldt_struct changes before we return, the values we see
+ * will be safe, and the new values will be loaded before we run
+ * any user code.
+ *
+ * NB: don't try to convert this to use RCU without extreme care.
+ * We would still need IRQs off, because we don't want to change
+ * the local LDT after an IPI loaded a newer value than the one
+ * that we can see.
+ */
+
+ if (unlikely(ldt))
+ set_ldt_and_map(ldt);
+ else
+ clear_LDT();
+}
+
/* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *__mm)
{
@@ -52,26 +108,35 @@ static void flush_ldt(void *__mm)
return;

pc = &mm->context;
- set_ldt(pc->ldt->entries_va, pc->ldt->nr_entries);
+ set_ldt_and_map(pc->ldt);

refresh_ldt_segments();
}

+static void __free_ldt_struct(struct ldt_struct *ldt)
+{
+ free_pages((unsigned long)ldt->entries_va, ldt->order);
+ kfree(ldt);
+}
+
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
struct ldt_struct *new_ldt;
unsigned int alloc_size;
+ struct page *page;
+ int order;

if (num_entries > LDT_ENTRIES)
return NULL;

- new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+ new_ldt = kzalloc(sizeof(struct ldt_struct), GFP_KERNEL);
if (!new_ldt)
return NULL;

BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
alloc_size = num_entries * LDT_ENTRY_SIZE;
+ order = get_order(alloc_size);

/*
* Xen is very picky: it requires a page-aligned LDT that has no
@@ -79,16 +144,14 @@ static struct ldt_struct *alloc_ldt_stru
* Keep it simple: zero the whole allocation and never allocate less
* than PAGE_SIZE.
*/
- if (alloc_size > PAGE_SIZE)
- new_ldt->entries_va = vzalloc(alloc_size);
- else
- new_ldt->entries_va = (void *)get_zeroed_page(GFP_KERNEL);
-
- if (!new_ldt->entries_va) {
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!page) {
kfree(new_ldt);
return NULL;
}
-
+ new_ldt->entries_va = page_address(page);
+ new_ldt->entries_pa = virt_to_phys(new_ldt->entries_va);
+ new_ldt->order = order;
new_ldt->nr_entries = num_entries;
return new_ldt;
}
@@ -116,11 +179,7 @@ static void free_ldt_struct(struct ldt_s
return;

paravirt_free_ldt(ldt->entries_va, ldt->nr_entries);
- if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree_atomic(ldt->entries_va);
- else
- free_page((unsigned long)ldt->entries_va);
- kfree(ldt);
+ __free_ldt_struct(ldt);
}

/*