[PATCH 4/4] x86: Pass kernel thread parameters in fork_frame

From: Brian Gerst
Date: Sat May 21 2016 - 12:05:25 EST


Instead of setting up a fake pt_regs context, put the kernel thread
function pointer and arg into the unused callee-restored registers
of struct fork_frame.

Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
---
arch/x86/entry/entry_32.S | 28 +++++++++++-----------------
arch/x86/entry/entry_64.S | 30 +++++++++++-------------------
arch/x86/kernel/process_32.c | 15 ++++-----------
arch/x86/kernel/process_64.c | 10 +++-------
4 files changed, 29 insertions(+), 54 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 05e5340..424e8d3 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -241,35 +241,29 @@ END(__switch_to_asm)
* A newly forked process directly context switches into this address.
*
* eax: prev task we switched from
+ * ebx: kernel thread func
+ * edi: kernel thread arg
*/
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
popl %eax

+ testl %ebx, %ebx
+ jnz 1f
+
+2:
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
call syscall_return_slowpath
jmp restore_all
-END(ret_from_fork)

-ENTRY(ret_from_kernel_thread)
- pushl %eax
- call schedule_tail
- popl %eax
- movl PT_EBP(%esp), %eax
- call *PT_EBX(%esp)
+ /* kernel thread */
+1: movl %edi, %eax
+ call *%ebx
movl %eax, PT_EAX(%esp)
-
- /*
- * Kernel threads return to userspace as if returning from a syscall.
- * We should check whether anything actually uses this path and, if so,
- * consider switching it over to ret_from_fork.
- */
- movl %esp, %eax
- call syscall_return_slowpath
- jmp restore_all
-ENDPROC(ret_from_kernel_thread)
+ jmp 2b
+END(ret_from_fork)

/*
* Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0542ad1..cc8a0ba 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -405,37 +405,29 @@ END(__switch_to_asm)
* A newly forked process directly context switches into this address.
*
* rax: prev task we switched from
+ * rbx: kernel thread func
+ * r12: kernel thread arg
*/
ENTRY(ret_from_fork)
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */

- testb $3, CS(%rsp) /* from kernel_thread? */
+ testq %rbx, %rbx /* from kernel_thread? */
jnz 1f

- /*
- * We came from kernel_thread. This code path is quite twisted, and
- * someone should clean it up.
- *
- * copy_thread_tls stashes the function pointer in RBX and the
- * parameter to be passed in RBP. The called function is permitted
- * to call do_execve and thereby jump to user mode.
- */
- movq RBP(%rsp), %rdi
- call *RBX(%rsp)
- movq %rax, RAX(%rsp)
-
- /*
- * Fall through as though we're exiting a syscall. This makes a
- * twisted sort of sense if we just called do_execve.
- */
-
-1:
+2:
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
+
+1:
+ /* kernel thread */
+ movq %r12, %rdi
+ call *%rbx
+ movq %rax, RAX(%rsp)
+ jmp 2b
END(ret_from_fork)

/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0ba6fdf..bba563f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -138,6 +138,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
int err;

frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) frame;
p->thread.sp0 = (unsigned long) (childregs+1);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -145,25 +146,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- frame->ret_addr = (unsigned long) ret_from_kernel_thread;
- task_user_gs(p) = __KERNEL_STACK_CANARY;
- childregs->ds = __USER_DS;
- childregs->es = __USER_DS;
- childregs->fs = __KERNEL_PERCPU;
- childregs->bx = sp; /* function */
- childregs->bp = arg;
- childregs->orig_ax = -1;
- childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+ frame->bx = sp; /* function */
+ frame->di = arg;
p->thread.io_bitmap_ptr = NULL;
return 0;
}
+ frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
if (sp)
childregs->sp = sp;

- frame->ret_addr = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());

p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9fab915..421646a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -163,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->sp = (unsigned long)childregs;
- childregs->ss = __KERNEL_DS;
- childregs->bx = sp; /* function */
- childregs->bp = arg;
- childregs->orig_ax = -1;
- childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+ frame->bx = sp; /* function */
+ frame->r12 = arg;
return 0;
}
+ frame->bx = 0;
*childregs = *current_pt_regs();

childregs->ax = 0;
--
2.5.5