[PATCH v5 15/17] x86/entry: Remove exception_enter from most trap handlers

From: Andy Lutomirski
Date: Fri Jul 03 2015 - 15:46:14 EST


On 64-bit kernels, we don't need it any more: we handle context
tracking directly on entry from user mode and exit to user mode. On
32-bit kernels, we don't support context tracking at all, so these
hooks had no effect.

This doesn't change do_page_fault. Before we do that, we need to
make sure that there is no code that can page fault from kernel mode
with CONTEXT_USER. The 32-bit fast system call stack argument code
is the only offender I'm aware of right now.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/include/asm/traps.h | 4 +-
arch/x86/kernel/cpu/mcheck/mce.c | 5 +--
arch/x86/kernel/cpu/mcheck/p5.c | 5 +--
arch/x86/kernel/cpu/mcheck/winchip.c | 4 +-
arch/x86/kernel/traps.c | 78 +++++++++---------------------------
5 files changed, 27 insertions(+), 69 deletions(-)

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c5380bea2a36..c3496619740a 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void);
asmlinkage void smp_deferred_error_interrupt(void);
#endif

-extern enum ctx_state ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
+extern void ist_enter(struct pt_regs *regs);
+extern void ist_exit(struct pt_regs *regs);
extern void ist_begin_non_atomic(struct pt_regs *regs);
extern void ist_end_non_atomic(void);

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index df919ff103c3..dc87973098dc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1029,7 +1029,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
{
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
- enum ctx_state prev_state;
int i;
int worst = 0;
int severity;
@@ -1055,7 +1054,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED;
int lmce = 0;

- prev_state = ist_enter(regs);
+ ist_enter(regs);

this_cpu_inc(mce_exception_count);

@@ -1227,7 +1226,7 @@ out:
local_irq_disable();
ist_end_non_atomic();
done:
- ist_exit(regs, prev_state);
+ ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);

diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 737b0ad4e61a..12402e10aeff 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
u32 loaddr, hi, lotype;

- prev_state = ist_enter(regs);
+ ist_enter(regs);

rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)

add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);

- ist_exit(regs, prev_state);
+ ist_exit(regs);
}

/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 44f138296fbe..01dd8702880b 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,12 +15,12 @@
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state = ist_enter(regs);
+ ist_enter(regs);

printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);

- ist_exit(regs, prev_state);
+ ist_exit(regs);
}

/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2a783c4fe0e9..8e65d8a9b8db 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -108,13 +108,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
preempt_count_dec();
}

-enum ctx_state ist_enter(struct pt_regs *regs)
+void ist_enter(struct pt_regs *regs)
{
- enum ctx_state prev_state;
-
if (user_mode(regs)) {
- /* Other than that, we're just an exception. */
- prev_state = exception_enter();
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
} else {
/*
* We might have interrupted pretty much anything. In
@@ -123,32 +120,25 @@ enum ctx_state ist_enter(struct pt_regs *regs)
* but we need to notify RCU.
*/
rcu_nmi_enter();
- prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
}

/*
- * We are atomic because we're on the IST stack (or we're on x86_32,
- * in which case we still shouldn't schedule).
- *
- * This must be after exception_enter(), because exception_enter()
- * won't do anything if in_interrupt() returns true.
+ * We are atomic because we're on the IST stack; or we're on
+ * x86_32, in which case we still shouldn't schedule; or we're
+ * on x86_64 and entered from user mode, in which case we're
+ * still atomic unless ist_begin_non_atomic is called.
*/
preempt_count_add(HARDIRQ_OFFSET);

/* This code is a bit fragile. Test it. */
rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
-
- return prev_state;
}

-void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
+void ist_exit(struct pt_regs *regs)
{
- /* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET);

- if (user_mode(regs))
- return exception_exit(prev_state);
- else
+ if (!user_mode(regs))
rcu_nmi_exit();
}

@@ -162,7 +152,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
* begins a non-atomic section within an ist_enter()/ist_exit() region.
* Callers are responsible for enabling interrupts themselves inside
- * the non-atomic section, and callers must call is_end_non_atomic()
+ * the non-atomic section, and callers must call ist_end_non_atomic()
* before ist_exit().
*/
void ist_begin_non_atomic(struct pt_regs *regs)
@@ -289,7 +279,6 @@ NOKPROBE_SYMBOL(do_trap);
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
unsigned long trapnr, int signr)
{
- enum ctx_state prev_state = exception_enter();
siginfo_t info;

CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -300,8 +289,6 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
-
- exception_exit(prev_state);
}

#define DO_ERROR(trapnr, signr, str, name) \
@@ -353,7 +340,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
}
#endif

- ist_enter(regs); /* Discard prev_state because we won't return. */
+ ist_enter(regs);
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);

tsk->thread.error_code = error_code;
@@ -373,15 +360,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)

dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
const struct bndcsr *bndcsr;
siginfo_t *info;

- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
- goto exit;
+ return;
conditional_sti(regs);

if (!user_mode(regs))
@@ -438,9 +423,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
die("bounds", regs, error_code);
}

-exit:
- exception_exit(prev_state);
return;
+
exit_trap:
/*
* This path out is for all the cases where we could not
@@ -450,36 +434,33 @@ exit_trap:
* time..
*/
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
- exception_exit(prev_state);
}

dotraplinkage void
do_general_protection(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk;
- enum ctx_state prev_state;

- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
conditional_sti(regs);

if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
- goto exit;
+ return;
}

tsk = current;
if (!user_mode(regs)) {
if (fixup_exception(regs))
- goto exit;
+ return;

tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die("general protection fault", regs, error_code);
- goto exit;
+ return;
}

tsk->thread.error_code = error_code;
@@ -495,16 +476,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
}

force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
-exit:
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(do_general_protection);

/* May run on IST stack. */
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
-
#ifdef CONFIG_DYNAMIC_FTRACE
/*
* ftrace must be first, everything else may cause a recursive crash.
@@ -517,7 +494,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
if (poke_int3_handler(regs))
return;

- prev_state = ist_enter(regs);
+ ist_enter(regs);
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -544,7 +521,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
debug_stack_usage_dec();
exit:
- ist_exit(regs, prev_state);
+ ist_exit(regs);
}
NOKPROBE_SYMBOL(do_int3);

@@ -620,12 +597,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret);
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- enum ctx_state prev_state;
int user_icebp = 0;
unsigned long dr6;
int si_code;

- prev_state = ist_enter(regs);
+ ist_enter(regs);

get_debugreg(dr6, 6);

@@ -700,7 +676,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();

exit:
- ist_exit(regs, prev_state);
+ ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);

@@ -752,23 +728,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)

dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
-
- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
math_error(regs, error_code, X86_TRAP_MF);
- exception_exit(prev_state);
}

dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
-
- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
math_error(regs, error_code, X86_TRAP_XF);
- exception_exit(prev_state);
}

dotraplinkage void
@@ -780,9 +748,6 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
- enum ctx_state prev_state;
-
- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
BUG_ON(use_eager_fpu());

@@ -794,7 +759,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)

info.regs = regs;
math_emulate(&info);
- exception_exit(prev_state);
return;
}
#endif
@@ -802,7 +766,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32
conditional_sti(regs);
#endif
- exception_exit(prev_state);
}
NOKPROBE_SYMBOL(do_device_not_available);

@@ -810,9 +773,7 @@ NOKPROBE_SYMBOL(do_device_not_available);
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
siginfo_t info;
- enum ctx_state prev_state;

- prev_state = exception_enter();
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
local_irq_enable();

@@ -825,7 +786,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
&info);
}
- exception_exit(prev_state);
}
#endif

--
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/