[RFC PATCH] x86, entry: Switch stacks on a paranoid entry from userspace

From: Andy Lutomirski
Date: Tue Nov 11 2014 - 15:57:01 EST


This causes all non-NMI kernel entries from userspace to run on the
normal kernel stack.

This means that machine check recovery can happen in non-atomic
context. It also obviates the need for the paranoid_userspace path.

Borislav has referred to this idea as the tail wagging the dog. I
think that's okay -- the dog was pretty ugly.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
---
arch/x86/kernel/entry_64.S | 61 +++++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb03fb3..9c0a7311af0d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1064,6 +1064,9 @@ ENTRY(\sym)
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15

.if \paranoid
+ CFI_REMEMBER_STATE
+ testl $3, CS(%rsp) /* If coming from userspace, switch */
+ jnz 1f /* stacks. */
call save_paranoid
.else
call error_entry
@@ -1104,6 +1107,36 @@ ENTRY(\sym)
jmp error_exit /* %ebx: no swapgs flag */
.endif

+ .if \paranoid
+ CFI_RESTORE_STATE
+ /*
+ * Paranoid entry from userspace. Switch stacks and treat it
+ * as a normal entry. This means that paranoid handlers
+ * run in real process context if user_mode(regs).
+ */
+1:
+ call error_entry
+
+ DEFAULT_FRAME 0
+
+ movq %rsp,%rdi /* pt_regs pointer */
+ call sync_regs
+ movq %rax,%rsp /* switch stack */
+
+ movq %rsp,%rdi /* pt_regs pointer */
+
+ .if \has_error_code
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ .else
+ xorl %esi,%esi /* no error code */
+ .endif
+
+ call \do_sym
+
+ jmp error_exit /* %ebx: no swapgs flag */
+ .endif
+
CFI_ENDPROC
END(\sym)
.endm
@@ -1324,8 +1357,6 @@ ENTRY(paranoid_exit)
TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
- testl $3,CS(%rsp)
- jnz paranoid_userspace
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
@@ -1335,32 +1366,6 @@ paranoid_restore:
TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
jmp irq_return
-paranoid_userspace:
- GET_THREAD_INFO(%rcx)
- movl TI_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule
- movl %ebx,%edx /* arg3: thread flags */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- jmp paranoid_userspace
-paranoid_schedule:
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY)
- SCHEDULE_USER
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- jmp paranoid_userspace
CFI_ENDPROC
END(paranoid_exit)

--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/