[PATCH 3.4 056/107] x86/ldt: Make modify_ldt synchronous

From: lizf
Date: Wed Mar 16 2016 - 04:28:25 EST


From: Andy Lutomirski <luto@xxxxxxxxxx>

3.4.111-rc1 review patch. If anyone has any objections, please let me know.

------------------


commit 37868fe113ff2ba814b3b4eb12df214df555f8dc upstream.

modify_ldt() has questionable locking and does not synchronize
threads. Improve it: redesign the locking and synchronize all
threads' LDTs using an IPI on all modifications.

This will dramatically slow down modify_ldt in multithreaded
programs, but there shouldn't be any multithreaded programs that
care about modify_ldt's performance in the first place.

This fixes some fallout from the CVE-2015-5157 fixes.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Reviewed-by: Borislav Petkov <bp@xxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Sasha Levin <sasha.levin@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: security@xxxxxxxxxx <security@xxxxxxxxxx>
Cc: xen-devel <xen-devel@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
[bwh: Backported to 3.2:
- Adjust context
- Drop comment changes in switch_mm()
- Drop changes to get_segment_base() in arch/x86/kernel/cpu/perf_event.c
- Open-code lockless_dereference(), smp_store_release(), on_each_cpu_mask()]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
[lizf: Backported to 3.4: adjust context]
Signed-off-by: Zefan Li <lizefan@xxxxxxxxxx>
---
arch/x86/include/asm/desc.h | 15 ---
arch/x86/include/asm/mmu.h | 3 +-
arch/x86/include/asm/mmu_context.h | 49 ++++++-
arch/x86/kernel/cpu/common.c | 4 +-
arch/x86/kernel/ldt.c | 267 ++++++++++++++++++++-----------------
arch/x86/kernel/process_64.c | 4 +-
arch/x86/kernel/step.c | 6 +-
arch/x86/power/cpu.c | 3 +-
8 files changed, 205 insertions(+), 146 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index fa9c8c7..d34c94f 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -279,21 +279,6 @@ static inline void clear_LDT(void)
set_ldt(NULL, 0);
}

-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
- set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
- preempt_disable();
- load_LDT_nolock(pc);
- preempt_enable();
-}
-
static inline unsigned long get_desc_base(const struct desc_struct *desc)
{
return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5f55e69..926f672 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,8 +9,7 @@
* we put the segment information here.
*/
typedef struct {
- void *ldt;
- int size;
+ struct ldt_struct *ldt;

#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6902152..ce4ea94 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -16,6 +16,51 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
#endif /* !CONFIG_PARAVIRT */

/*
+ * ldt_structs can be allocated, used, and freed, but they are never
+ * modified while live.
+ */
+struct ldt_struct {
+ /*
+ * Xen requires page-aligned LDTs with special permissions. This is
+ * needed to prevent us from installing evil descriptors such as
+ * call gates. On native, we could merge the ldt_struct and LDT
+ * allocations, but it's not worth trying to optimize.
+ */
+ struct desc_struct *entries;
+ int size;
+};
+
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
+ struct ldt_struct *ldt;
+
+ /* smp_read_barrier_depends synchronizes with barrier in install_ldt */
+ ldt = ACCESS_ONCE(mm->context.ldt);
+ smp_read_barrier_depends();
+
+ /*
+ * Any change to mm->context.ldt is followed by an IPI to all
+ * CPUs with the mm active. The LDT will not be freed until
+ * after the IPI is handled by all such CPUs. This means that,
+ * if the ldt_struct changes before we return, the values we see
+ * will be safe, and the new values will be loaded before we run
+ * any user code.
+ *
+ * NB: don't try to convert this to use RCU without extreme care.
+ * We would still need IRQs off, because we don't want to change
+ * the local LDT after an IPI loaded a newer value than the one
+ * that we can see.
+ */
+
+ if (unlikely(ldt))
+ set_ldt(ldt->entries, ldt->size);
+ else
+ clear_LDT();
+
+ DEBUG_LOCKS_WARN_ON(preemptible());
+}
+
+/*
* Used for LDT copy/destruction.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
@@ -52,7 +97,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
- load_LDT_nolock(&next->context);
+ load_mm_ldt(next);
}
#ifdef CONFIG_SMP
else {
@@ -65,7 +110,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
- load_LDT_nolock(&next->context);
+ load_mm_ldt(next);
}
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 114db0f..b190a62 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1254,7 +1254,7 @@ void __cpuinit cpu_init(void)
load_sp0(t, &current->thread);
set_tss_desc(cpu, t);
load_TR_desc();
- load_LDT(&init_mm.context);
+ load_mm_ldt(&init_mm);

clear_all_debug_regs();
dbg_restore_debug_regs();
@@ -1302,7 +1302,7 @@ void __cpuinit cpu_init(void)
load_sp0(t, thread);
set_tss_desc(cpu, t);
load_TR_desc();
- load_LDT(&init_mm.context);
+ load_mm_ldt(&init_mm);

t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c37886d..fba5131 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>

@@ -20,82 +21,87 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>

-#ifdef CONFIG_SMP
+/* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *current_mm)
{
- if (current->active_mm == current_mm)
- load_LDT(&current->active_mm->context);
+ mm_context_t *pc;
+
+ if (current->active_mm != current_mm)
+ return;
+
+ pc = &current->active_mm->context;
+ set_ldt(pc->ldt->entries, pc->ldt->size);
}
-#endif

-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+static struct ldt_struct *alloc_ldt_struct(int size)
{
- void *oldldt, *newldt;
- int oldsize;
-
- if (mincount <= pc->size)
- return 0;
- oldsize = pc->size;
- mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
- (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
- if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
- newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+ struct ldt_struct *new_ldt;
+ int alloc_size;
+
+ if (size > LDT_ENTRIES)
+ return NULL;
+
+ new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+ if (!new_ldt)
+ return NULL;
+
+ BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
+ alloc_size = size * LDT_ENTRY_SIZE;
+
+ /*
+ * Xen is very picky: it requires a page-aligned LDT that has no
+ * trailing nonzero bytes in any page that contains LDT descriptors.
+ * Keep it simple: zero the whole allocation and never allocate less
+ * than PAGE_SIZE.
+ */
+ if (alloc_size > PAGE_SIZE)
+ new_ldt->entries = vzalloc(alloc_size);
else
- newldt = (void *)__get_free_page(GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
+ new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);

- if (oldsize)
- memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
- oldldt = pc->ldt;
- memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
- (mincount - oldsize) * LDT_ENTRY_SIZE);
+ if (!new_ldt->entries) {
+ kfree(new_ldt);
+ return NULL;
+ }

- paravirt_alloc_ldt(newldt, mincount);
+ new_ldt->size = size;
+ return new_ldt;
+}

-#ifdef CONFIG_X86_64
- /* CHECKME: Do we really need this ? */
- wmb();
-#endif
- pc->ldt = newldt;
- wmb();
- pc->size = mincount;
- wmb();
-
- if (reload) {
-#ifdef CONFIG_SMP
- preempt_disable();
- load_LDT(pc);
- if (!cpumask_equal(mm_cpumask(current->mm),
- cpumask_of(smp_processor_id())))
- smp_call_function(flush_ldt, current->mm, 1);
- preempt_enable();
-#else
- load_LDT(pc);
-#endif
- }
- if (oldsize) {
- paravirt_free_ldt(oldldt, oldsize);
- if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(oldldt);
- else
- put_page(virt_to_page(oldldt));
- }
- return 0;
+/* After calling this, the LDT is immutable. */
+static void finalize_ldt_struct(struct ldt_struct *ldt)
+{
+ paravirt_alloc_ldt(ldt->entries, ldt->size);
}

-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+/* context.lock is held */
+static void install_ldt(struct mm_struct *current_mm,
+ struct ldt_struct *ldt)
{
- int err = alloc_ldt(new, old->size, 0);
- int i;
+ /* Synchronizes with smp_read_barrier_depends in load_mm_ldt. */
+ barrier();
+ ACCESS_ONCE(current_mm->context.ldt) = ldt;
+
+ /* Activate the LDT for all CPUs using current_mm. */
+ smp_call_function_many(mm_cpumask(current_mm), flush_ldt, current_mm,
+ true);
+ local_irq_disable();
+ flush_ldt(current_mm);
+ local_irq_enable();
+}

- if (err < 0)
- return err;
+static void free_ldt_struct(struct ldt_struct *ldt)
+{
+ if (likely(!ldt))
+ return;

- for (i = 0; i < old->size; i++)
- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
- return 0;
+ paravirt_free_ldt(ldt->entries, ldt->size);
+ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(ldt->entries);
+ else
+ kfree(ldt->entries);
+ kfree(ldt);
}

/*
@@ -104,17 +110,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
+ struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
int retval = 0;

mutex_init(&mm->context.lock);
- mm->context.size = 0;
old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- mutex_lock(&old_mm->context.lock);
- retval = copy_ldt(&mm->context, &old_mm->context);
- mutex_unlock(&old_mm->context.lock);
+ if (!old_mm) {
+ mm->context.ldt = NULL;
+ return 0;
+ }
+
+ mutex_lock(&old_mm->context.lock);
+ if (!old_mm->context.ldt) {
+ mm->context.ldt = NULL;
+ goto out_unlock;
}
+
+ new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
+ if (!new_ldt) {
+ retval = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(new_ldt->entries, old_mm->context.ldt->entries,
+ new_ldt->size * LDT_ENTRY_SIZE);
+ finalize_ldt_struct(new_ldt);
+
+ mm->context.ldt = new_ldt;
+
+out_unlock:
+ mutex_unlock(&old_mm->context.lock);
return retval;
}

@@ -125,53 +151,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
void destroy_context(struct mm_struct *mm)
{
- if (mm->context.size) {
-#ifdef CONFIG_X86_32
- /* CHECKME: Can this ever happen ? */
- if (mm == current->active_mm)
- clear_LDT();
-#endif
- paravirt_free_ldt(mm->context.ldt, mm->context.size);
- if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(mm->context.ldt);
- else
- put_page(virt_to_page(mm->context.ldt));
- mm->context.size = 0;
- }
+ free_ldt_struct(mm->context.ldt);
+ mm->context.ldt = NULL;
}

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
- int err;
+ int retval;
unsigned long size;
struct mm_struct *mm = current->mm;

- if (!mm->context.size)
- return 0;
+ mutex_lock(&mm->context.lock);
+
+ if (!mm->context.ldt) {
+ retval = 0;
+ goto out_unlock;
+ }
+
if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;

- mutex_lock(&mm->context.lock);
- size = mm->context.size * LDT_ENTRY_SIZE;
+ size = mm->context.ldt->size * LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;

- err = 0;
- if (copy_to_user(ptr, mm->context.ldt, size))
- err = -EFAULT;
- mutex_unlock(&mm->context.lock);
- if (err < 0)
- goto error_return;
+ if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+ retval = -EFAULT;
+ goto out_unlock;
+ }
+
if (size != bytecount) {
- /* zero-fill the rest */
- if (clear_user(ptr + size, bytecount - size) != 0) {
- err = -EFAULT;
- goto error_return;
+ /* Zero-fill the rest and pretend we read bytecount bytes. */
+ if (clear_user(ptr + size, bytecount - size)) {
+ retval = -EFAULT;
+ goto out_unlock;
}
}
- return bytecount;
-error_return:
- return err;
+ retval = bytecount;
+
+out_unlock:
+ mutex_unlock(&mm->context.lock);
+ return retval;
}

static int read_default_ldt(void __user *ptr, unsigned long bytecount)
@@ -195,6 +215,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
struct desc_struct ldt;
int error;
struct user_desc ldt_info;
+ int oldsize, newsize;
+ struct ldt_struct *new_ldt, *old_ldt;

error = -EINVAL;
if (bytecount != sizeof(ldt_info))
@@ -213,34 +235,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
goto out;
}

- mutex_lock(&mm->context.lock);
- if (ldt_info.entry_number >= mm->context.size) {
- error = alloc_ldt(&current->mm->context,
- ldt_info.entry_number + 1, 1);
- if (error < 0)
- goto out_unlock;
- }
-
- /* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode || LDT_empty(&ldt_info)) {
- memset(&ldt, 0, sizeof(ldt));
- goto install;
+ if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
+ LDT_empty(&ldt_info)) {
+ /* The user wants to clear the entry. */
+ memset(&ldt, 0, sizeof(ldt));
+ } else {
+ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+ error = -EINVAL;
+ goto out;
}
+
+ fill_ldt(&ldt, &ldt_info);
+ if (oldmode)
+ ldt.avl = 0;
}

- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
- error = -EINVAL;
+ mutex_lock(&mm->context.lock);
+
+ old_ldt = mm->context.ldt;
+ oldsize = old_ldt ? old_ldt->size : 0;
+ newsize = max((int)(ldt_info.entry_number + 1), oldsize);
+
+ error = -ENOMEM;
+ new_ldt = alloc_ldt_struct(newsize);
+ if (!new_ldt)
goto out_unlock;
- }

- fill_ldt(&ldt, &ldt_info);
- if (oldmode)
- ldt.avl = 0;
+ if (old_ldt)
+ memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
+ new_ldt->entries[ldt_info.entry_number] = ldt;
+ finalize_ldt_struct(new_ldt);

- /* Install the new entry ... */
-install:
- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
+ install_ldt(mm, new_ldt);
+ free_ldt_struct(old_ldt);
error = 0;

out_unlock:
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bb390e1..3ebca08 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -116,11 +116,11 @@ void __show_regs(struct pt_regs *regs, int all)
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
- if (dead_task->mm->context.size) {
+ if (dead_task->mm->context.ldt) {
printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
- dead_task->mm->context.size);
+ dead_task->mm->context.ldt->size);
BUG();
}
}
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f89cdc6..5d7eccc 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <asm/desc.h>
+#include <asm/mmu_context.h>

unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{
@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
seg &= ~7UL;

mutex_lock(&child->mm->context.lock);
- if (unlikely((seg >> 3) >= child->mm->context.size))
+ if (unlikely(!child->mm->context.ldt ||
+ (seg >> 3) >= child->mm->context.ldt->size))
addr = -1L; /* bogus selector, access would fault */
else {
- desc = child->mm->context.ldt + seg;
+ desc = &child->mm->context.ldt->entries[seg];
base = get_desc_base(desc);

/* 16-bit code segment? */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index fcbaac60..dd298e7 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -22,6 +22,7 @@
#include <asm/suspend.h>
#include <asm/debugreg.h>
#include <asm/fpu-internal.h> /* pcntxt_mask */
+#include <asm/mmu_context.h>

#ifdef CONFIG_X86_32
static struct saved_context saved_context;
@@ -148,7 +149,7 @@ static void fix_processor_context(void)
syscall_init(); /* This sets MSR_*STAR and related */
#endif
load_TR_desc(); /* This does ltr */
- load_LDT(&current->active_mm->context); /* This does lldt */
+ load_mm_ldt(current->active_mm); /* This does lldt */
}

/**
--
1.9.1