[RFC PATCH 1/4] mm: Check user stack pointer is mapped with MAP_STACK

From: Will Deacon
Date: Mon Feb 11 2019 - 12:59:47 EST


By marking stack VMAs with VM_USERSTACK, we can perform optional checks
on entry to the kernel from system calls and user faults to ensure that
the user stack pointer does indeed point to a stack VMA. If the stack
pointer is found to point elsewhere, a SIGSEGV can be delivered to the
current application.

This acts as a best-effort defense against stack-pivoting attacks.

Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Jann Horn <jannh@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Will Deacon <will.deacon@xxxxxxx>
---
include/linux/mm.h | 10 +++++++++-
include/linux/mman.h | 3 ++-
include/linux/sched.h | 4 ++++
mm/memory.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..9fa02d47a270 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -233,6 +233,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
+#define VM_USERSTACK 0x08000000 /* User stack VM */

#ifdef CONFIG_MEM_SOFT_DIRTY
# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */
@@ -310,7 +311,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_STACK VM_GROWSDOWN
#endif

-#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT | \
+ VM_USERSTACK)

/*
* Special vmas that are non-mergable, non-mlock()able.
@@ -1480,6 +1482,12 @@ int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
int invalidate_inode_page(struct page *page);

+#ifdef CONFIG_USER_STACK_POINTER_CHECKS
+bool usp_check_syscall(void);
+#else
+static inline bool usp_check_syscall(void) { return true; }
+#endif
+
#ifdef CONFIG_MMU
extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 4b08e9c9c538..d4f2d39fca70 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -131,7 +131,8 @@ calc_vm_flag_bits(unsigned long flags)
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
- _calc_vm_trans(flags, MAP_SYNC, VM_SYNC );
+ _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
+ _calc_vm_trans(flags, MAP_STACK, VM_USERSTACK ) ;
}

unsigned long vm_commit_limit(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bba3afb4e9bf..2e6766301645 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1208,6 +1208,10 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif

+#ifdef CONFIG_USER_STACK_POINTER_CHECKS
+ unsigned int usp_checks;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/mm/memory.c b/mm/memory.c
index e11ca9dd823f..e0b449f520da 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -44,6 +44,7 @@
#include <linux/sched/coredump.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/swap.h>
@@ -63,6 +64,7 @@
#include <linux/elf.h>
#include <linux/gfp.h>
#include <linux/migrate.h>
+#include <linux/ptrace.h>
#include <linux/string.h>
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
@@ -3911,6 +3913,46 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
return handle_pte_fault(&vmf);
}

+#ifdef CONFIG_USER_STACK_POINTER_CHECKS
+#define USP_CHECK_FAULT (1U << 0)
+#define USP_CHECK_SYSCALL (1U << 1)
+
+static bool __usp_check(void)
+{
+ struct vm_area_struct *vma;
+
+ vma = find_vma(current->mm, current_user_stack_pointer());
+ return vma && (vma->vm_flags & VM_USERSTACK);
+}
+
+static bool usp_check_fault(unsigned int flags)
+{
+ if (!(flags & FAULT_FLAG_USER))
+ return true;
+
+ if (!(current->usp_checks & USP_CHECK_FAULT))
+ return true;
+
+ return __usp_check();
+}
+
+bool usp_check_syscall(void)
+{
+ bool ret;
+ struct mm_struct *mm = current->mm;
+
+ if (!(current->usp_checks & USP_CHECK_SYSCALL))
+ return true;
+
+ down_read(&mm->mmap_sem);
+ ret = __usp_check();
+ up_read(&mm->mmap_sem);
+ return ret;
+}
+#else
+static bool usp_check_fault(unsigned int flags) { return true; }
+#endif
+
/*
* By the time we get here, we already hold the mm semaphore
*
@@ -3930,6 +3972,9 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);

+ if (!usp_check_fault(flags))
+ return VM_FAULT_SIGSEGV;
+
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
flags & FAULT_FLAG_INSTRUCTION,
flags & FAULT_FLAG_REMOTE))
--
2.11.0