Re: [tip:x86/urgent] x86, trace: Fix CR2 corruption when tracing page faults

From: Steven Rostedt
Date: Wed Mar 05 2014 - 08:00:34 EST


On Wed, 5 Mar 2014 13:36:35 +0100
Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:

> On Wed, Mar 05, 2014 at 01:25:35PM +0100, Peter Zijlstra wrote:
> > On Wed, Mar 05, 2014 at 07:20:22AM -0500, Steven Rostedt wrote:
> > > On Wed, 5 Mar 2014 12:14:15 +0100
> > > Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> > >
> > > Please no! I used tracing of the do_page_fault function all the time.
> > > It is very useful.
> >
> > Why do you trace do_page_fault and not the __do_page_fault() function?
> > do_page_fault() is a minimal wrapper which doesn't actually do all that
> > much?
>
> Here, something like so; then you can still trace your do_page_fault().
>
> It also fixes up trace_page_fault_entries() to not re-read the cr2.
>
> ---
> arch/x86/include/asm/traps.h | 2 +-
> arch/x86/kernel/entry_32.S | 2 +-
> arch/x86/kernel/entry_64.S | 2 +-
> arch/x86/kernel/kvm.c | 2 +-
> arch/x86/mm/fault.c | 21 ++++++++++-----------
> 5 files changed, 14 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
> index 58d66fe06b61..1280f72deea8 100644
> --- a/arch/x86/include/asm/traps.h
> +++ b/arch/x86/include/asm/traps.h
> @@ -71,7 +71,7 @@ dotraplinkage void do_double_fault(struct pt_regs *, long);
> asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
> #endif
> dotraplinkage void do_general_protection(struct pt_regs *, long);
> -dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
> +dotraplinkage void normal_do_page_fault(struct pt_regs *, unsigned long);

Why not call it "do_do_page_fault()" ;-)

> #ifdef CONFIG_TRACING
> dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
> #endif
> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> index a2a4f4697889..9a9f64755da8 100644
> --- a/arch/x86/kernel/entry_32.S
> +++ b/arch/x86/kernel/entry_32.S
> @@ -1257,7 +1257,7 @@ END(trace_page_fault)
> ENTRY(page_fault)
> RING0_EC_FRAME
> ASM_CLAC
> - pushl_cfi $do_page_fault
> + pushl_cfi $normal_do_page_fault
> ALIGN
> error_code:
> /* the function address is in %gs's slot on the stack */
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 1e96c3628bf2..7d49812741ac 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1491,7 +1491,7 @@ zeroentry xen_int3 do_int3
> errorentry xen_stack_segment do_stack_segment
> #endif
> errorentry general_protection do_general_protection
> -trace_errorentry page_fault do_page_fault
> +trace_errorentry page_fault normal_do_page_fault
> #ifdef CONFIG_KVM_GUEST
> errorentry async_page_fault do_async_page_fault
> #endif
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 713f1b3bad52..9e7db22ec437 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -259,7 +259,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
>
> switch (kvm_read_and_reset_pf_reason()) {
> default:
> - do_page_fault(regs, error_code);
> + normal_do_page_fault(regs, error_code);
> break;
> case KVM_PV_REASON_PAGE_NOT_PRESENT:
> /* page is swapped out by the host. */
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index e7fa28bf3262..576cfc3d0086 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1022,8 +1022,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
> * routines.
> */
> static void __kprobes

Still requires the "noinline" above

-- Steve

> -__do_page_fault(struct pt_regs *regs, unsigned long error_code,
> - unsigned long address)
> +do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
> {
> struct vm_area_struct *vma;
> struct task_struct *tsk;
> @@ -1245,28 +1244,28 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
> up_read(&mm->mmap_sem);
> }
>
> -dotraplinkage void __kprobes
> -do_page_fault(struct pt_regs *regs, unsigned long error_code)
> +dotraplinkage void __kprobes notrace
> +normal_do_page_fault(struct pt_regs *regs, unsigned long error_code)
> {
> enum ctx_state prev_state;
> /* Get the faulting address: */
> unsigned long address = read_cr2();
>
> prev_state = exception_enter();
> - __do_page_fault(regs, error_code, address);
> + do_page_fault(regs, error_code, address);
> exception_exit(prev_state);
> }
>
> -static void trace_page_fault_entries(struct pt_regs *regs,
> +static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
> unsigned long error_code)
> {
> if (user_mode(regs))
> - trace_page_fault_user(read_cr2(), regs, error_code);
> + trace_page_fault_user(address, regs, error_code);
> else
> - trace_page_fault_kernel(read_cr2(), regs, error_code);
> + trace_page_fault_kernel(address, regs, error_code);
> }
>
> -dotraplinkage void __kprobes
> +dotraplinkage void __kprobes notrace
> trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
> {
> enum ctx_state prev_state;
> @@ -1279,7 +1278,7 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
> unsigned long address = read_cr2();
>
> prev_state = exception_enter();
> - trace_page_fault_entries(regs, error_code);
> - __do_page_fault(regs, error_code, address);
> + trace_page_fault_entries(address, regs, error_code);
> + do_page_fault(regs, error_code, address);
> exception_exit(prev_state);
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/