Re: [PATCH v2 27/36] x86/entry/32: Re-implement SYSENTER using the new C path
From: Denys Vlasenko
Date: Wed Oct 07 2015 - 14:08:34 EST
On 10/06/2015 02:48 AM, Andy Lutomirski wrote:
> # SYSENTER call handler stub
> ENTRY(entry_SYSENTER_32)
> movl TSS_sysenter_sp0(%esp), %esp
> sysenter_past_esp:
> + pushl $__USER_DS /* pt_regs->ss */
> + pushl %ecx /* pt_regs->cx */
> + pushfl /* pt_regs->flags (except IF = 0) */
> + orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
> + pushl $__USER_CS /* pt_regs->cs */
> + pushl $0 /* pt_regs->ip = 0 (placeholder) */
> + pushl %eax /* pt_regs->orig_ax */
> + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
> +
> /*
> - * Interrupts are disabled here, but we can't trace it until
> - * enough kernel state to call TRACE_IRQS_OFF can be called - but
> - * we immediately enable interrupts at that point anyway.
> - */
> - pushl $__USER_DS
> - pushl %ebp
> - pushfl
> - orl $X86_EFLAGS_IF, (%esp)
> - pushl $__USER_CS
> - /*
> - * Push current_thread_info()->sysenter_return to the stack.
> - * A tiny bit of offset fixup is necessary: TI_sysenter_return
> - * is relative to thread_info, which is at the bottom of the
> - * kernel stack page. 4*4 means the 4 words pushed above;
> - * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
> - * and THREAD_SIZE takes us to the bottom.
> + * User mode is traced as though IRQs are on, and SYSENTER
> + * turned them off.
> */
> - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
> -
> - pushl %eax
> - SAVE_ALL
> - ENABLE_INTERRUPTS(CLBR_NONE)
> -
> -/*
> - * Load the potential sixth argument from user stack.
> - * Careful about security.
> - */
> - cmpl $__PAGE_OFFSET-3, %ebp
> - jae syscall_fault
> - ASM_STAC
> -1: movl (%ebp), %ebp
> - ASM_CLAC
> - movl %ebp, PT_EBP(%esp)
> - _ASM_EXTABLE(1b, syscall_fault)
> -
> - GET_THREAD_INFO(%ebp)
> -
> - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
> - jnz syscall_trace_entry
> -sysenter_do_call:
> - cmpl $(NR_syscalls), %eax
> - jae sysenter_badsys
> - call *sys_call_table(, %eax, 4)
> -sysenter_after_call:
> - movl %eax, PT_EAX(%esp)
> - LOCKDEP_SYS_EXIT
> - DISABLE_INTERRUPTS(CLBR_ANY)
> TRACE_IRQS_OFF
> - movl TI_flags(%ebp), %ecx
> - testl $_TIF_ALLWORK_MASK, %ecx
> - jnz syscall_exit_work_irqs_off
> -sysenter_exit:
> -/* if something modifies registers it must also disable sysexit */
> - movl PT_EIP(%esp), %edx
> - movl PT_OLDESP(%esp), %ecx
> - xorl %ebp, %ebp
> - TRACE_IRQS_ON
> +
> + movl %esp, %eax
> + call do_fast_syscall_32
> + testl %eax, %eax
> + jz .Lsyscall_32_done
> +
> +/* Opportunistic SYSEXIT */
> + TRACE_IRQS_ON /* User mode traces as IRQs on. */
> + movl PT_EIP(%esp), %edx /* pt_regs->ip */
> + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
> + popl %ebx /* pt_regs->bx */
> + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
Here stack engine and ALUs operating on ESP conflict,
potentially adding a stall both before and after ADD.
It might be faster to just pop twice into an unused register, say,
popl %eax
popl %eax
> + popl %esi /* pt_regs->si */
> + popl %edi /* pt_regs->di */
> + popl %ebp /* pt_regs->bp */
> + popl %eax /* pt_regs->ax */
> 1: mov PT_FS(%esp), %fs
> PTGS_TO_GS
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/