Re: [PATCH] MIPS: scall: Save thread_info.syscall unconditionally on entry

From: Oleg Nesterov
Date: Thu Apr 04 2024 - 11:32:56 EST


On 04/04, Jiaxun Yang wrote:
>
>
> 在2024年3月28日三月 下午2:27,Jiaxun Yang写道:
> > thread_info.syscall is used by syscall_get_nr to supply syscall nr
> > over a thread stack frame.
> >
> > Previously, thread_info.syscall is only saved at syscall_trace_enter
> > when syscall tracing is enabled. However rest of the kernel code do
> > expect syscall_get_nr to be available without syscall tracing. The
> > previous design breaks collect_syscall.
> >
> > Move saving process to syscall entry to fix it.
> >
> > Reported-by: Xi Ruoyao <xry111@xxxxxxxxxxx>
> > Link: https://github.com/util-linux/util-linux/issues/2867
> > Signed-off-by: Jiaxun Yang <jiaxun.yang@xxxxxxxxxxx>
>
> Ping, this needs to be in fixes tree.

Just in case... I see nothing wrong, but I can't review this patch.
I know nothing about arch/mips/ so I can't really understand these
low-level changes.

Oleg.

>
> Thanks
>
> > ---
> > arch/mips/include/asm/ptrace.h | 2 +-
> > arch/mips/kernel/asm-offsets.c | 1 +
> > arch/mips/kernel/ptrace.c | 15 ++++++---------
> > arch/mips/kernel/scall32-o32.S | 23 +++++++++++++----------
> > arch/mips/kernel/scall64-n32.S | 3 ++-
> > arch/mips/kernel/scall64-n64.S | 3 ++-
> > arch/mips/kernel/scall64-o32.S | 33 +++++++++++++++++----------------
> > 7 files changed, 42 insertions(+), 38 deletions(-)
> >
> > diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
> > index d14d0e37ad02..4a2b40ce39e0 100644
> > --- a/arch/mips/include/asm/ptrace.h
> > +++ b/arch/mips/include/asm/ptrace.h
> > @@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
> > #define exception_ip(regs) exception_ip(regs)
> > #define profile_pc(regs) instruction_pointer(regs)
> >
> > -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
> > +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
> > extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
> >
> > extern void die(const char *, struct pt_regs *) __noreturn;
> > diff --git a/arch/mips/kernel/asm-offsets.c
> > b/arch/mips/kernel/asm-offsets.c
> > index d1b11f66f748..cb1045ebab06 100644
> > --- a/arch/mips/kernel/asm-offsets.c
> > +++ b/arch/mips/kernel/asm-offsets.c
> > @@ -101,6 +101,7 @@ void output_thread_info_defines(void)
> > OFFSET(TI_CPU, thread_info, cpu);
> > OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
> > OFFSET(TI_REGS, thread_info, regs);
> > + OFFSET(TI_SYSCALL, thread_info, syscall);
> > DEFINE(_THREAD_SIZE, THREAD_SIZE);
> > DEFINE(_THREAD_MASK, THREAD_MASK);
> > DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
> > diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
> > index 59288c13b581..61503a36067e 100644
> > --- a/arch/mips/kernel/ptrace.c
> > +++ b/arch/mips/kernel/ptrace.c
> > @@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child,
> > long request,
> > * Notification of system call entry/exit
> > * - triggered by current->work.syscall_trace
> > */
> > -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
> > +asmlinkage long syscall_trace_enter(struct pt_regs *regs)
> > {
> > user_exit();
> >
> > - current_thread_info()->syscall = syscall;
> > -
> > if (test_thread_flag(TIF_SYSCALL_TRACE)) {
> > if (ptrace_report_syscall_entry(regs))
> > return -1;
> > - syscall = current_thread_info()->syscall;
> > }
> >
> > #ifdef CONFIG_SECCOMP
> > @@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct
> > pt_regs *regs, long syscall)
> > struct seccomp_data sd;
> > unsigned long args[6];
> >
> > - sd.nr = syscall;
> > + sd.nr = current_thread_info()->syscall;
> > sd.arch = syscall_get_arch(current);
> > syscall_get_arguments(current, regs, args);
> > for (i = 0; i < 6; i++)
> > @@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct
> > pt_regs *regs, long syscall)
> > ret = __secure_computing(&sd);
> > if (ret == -1)
> > return ret;
> > - syscall = current_thread_info()->syscall;
> > }
> > #endif
> >
> > if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > trace_sys_enter(regs, regs->regs[2]);
> >
> > - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
> > + audit_syscall_entry(current_thread_info()->syscall,
> > + regs->regs[4], regs->regs[5],
> > regs->regs[6], regs->regs[7]);
> >
> > /*
> > * Negative syscall numbers are mistaken for rejected syscalls, but
> > * won't have had the return value set appropriately, so we do so now.
> > */
> > - if (syscall < 0)
> > + if (current_thread_info()->syscall < 0)
> > syscall_set_return_value(current, regs, -ENOSYS, 0);
> > - return syscall;
> > + return current_thread_info()->syscall;
> > }
> >
> > /*
> > diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
> > index 18dc9b345056..2c604717e630 100644
> > --- a/arch/mips/kernel/scall32-o32.S
> > +++ b/arch/mips/kernel/scall32-o32.S
> > @@ -77,6 +77,18 @@ loads_done:
> > PTR_WD load_a7, bad_stack_a7
> > .previous
> >
> > + /*
> > + * syscall number is in v0 unless we called syscall(__NR_###)
> > + * where the real syscall number is in a0
> > + */
> > + subu t2, v0, __NR_O32_Linux
> > + bnez t2, 1f /* __NR_syscall at offset 0 */
> > + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
> > + b 2f
> > +1:
> > + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
> > +2:
> > +
> > lw t0, TI_FLAGS($28) # syscall tracing enabled?
> > li t1, _TIF_WORK_SYSCALL_ENTRY
> > and t0, t1
> > @@ -114,16 +126,7 @@ syscall_trace_entry:
> > SAVE_STATIC
> > move a0, sp
> >
> > - /*
> > - * syscall number is in v0 unless we called syscall(__NR_###)
> > - * where the real syscall number is in a0
> > - */
> > - move a1, v0
> > - subu t2, v0, __NR_O32_Linux
> > - bnez t2, 1f /* __NR_syscall at offset 0 */
> > - lw a1, PT_R4(sp)
> > -
> > -1: jal syscall_trace_enter
> > + jal syscall_trace_enter
> >
> > bltz v0, 1f # seccomp failed? Skip syscall
> >
> > diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
> > index 97456b2ca7dc..97788859238c 100644
> > --- a/arch/mips/kernel/scall64-n32.S
> > +++ b/arch/mips/kernel/scall64-n32.S
> > @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
> >
> > sd a3, PT_R26(sp) # save a3 for syscall restarting
> >
> > + LONG_S v0, TI_SYSCALL($28) # Store syscall number
> > +
> > li t1, _TIF_WORK_SYSCALL_ENTRY
> > LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
> > and t0, t1, t0
> > @@ -72,7 +74,6 @@ syscall_common:
> > n32_syscall_trace_entry:
> > SAVE_STATIC
> > move a0, sp
> > - move a1, v0
> > jal syscall_trace_enter
> >
> > bltz v0, 1f # seccomp failed? Skip syscall
> > diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
> > index e6264aa62e45..be11ea5cc67e 100644
> > --- a/arch/mips/kernel/scall64-n64.S
> > +++ b/arch/mips/kernel/scall64-n64.S
> > @@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
> >
> > sd a3, PT_R26(sp) # save a3 for syscall restarting
> >
> > + LONG_S v0, TI_SYSCALL($28) # Store syscall number
> > +
> > li t1, _TIF_WORK_SYSCALL_ENTRY
> > LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
> > and t0, t1, t0
> > @@ -82,7 +84,6 @@ n64_syscall_exit:
> > syscall_trace_entry:
> > SAVE_STATIC
> > move a0, sp
> > - move a1, v0
> > jal syscall_trace_enter
> >
> > bltz v0, 1f # seccomp failed? Skip syscall
> > diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
> > index d3c2616cba22..7a5abb73e531 100644
> > --- a/arch/mips/kernel/scall64-o32.S
> > +++ b/arch/mips/kernel/scall64-o32.S
> > @@ -79,6 +79,22 @@ loads_done:
> > PTR_WD load_a7, bad_stack_a7
> > .previous
> >
> > + /*
> > + * absolute syscall number is in v0 unless we called syscall(__NR_###)
> > + * where the real syscall number is in a0
> > + * note: NR_syscall is the first O32 syscall but the macro is
> > + * only defined when compiling with -mabi=32 (CONFIG_32BIT)
> > + * therefore __NR_O32_Linux is used (4000)
> > + */
> > +
> > + subu t2, v0, __NR_O32_Linux
> > + bnez t2, 1f /* __NR_syscall at offset 0 */
> > + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
> > + b 2f
> > +1:
> > + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
> > +2:
> > +
> > li t1, _TIF_WORK_SYSCALL_ENTRY
> > LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
> > and t0, t1, t0
> > @@ -113,22 +129,7 @@ trace_a_syscall:
> > sd a7, PT_R11(sp) # For indirect syscalls
> >
> > move a0, sp
> > - /*
> > - * absolute syscall number is in v0 unless we called syscall(__NR_###)
> > - * where the real syscall number is in a0
> > - * note: NR_syscall is the first O32 syscall but the macro is
> > - * only defined when compiling with -mabi=32 (CONFIG_32BIT)
> > - * therefore __NR_O32_Linux is used (4000)
> > - */
> > - .set push
> > - .set reorder
> > - subu t1, v0, __NR_O32_Linux
> > - move a1, v0
> > - bnez t1, 1f /* __NR_syscall at offset 0 */
> > - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
> > - .set pop
> > -
> > -1: jal syscall_trace_enter
> > + jal syscall_trace_enter
> >
> > bltz v0, 1f # seccomp failed? Skip syscall
> >
> >
> > ---
> > base-commit: a6bd6c9333397f5a0e2667d4d82fef8c970108f2
> > change-id: 20240328-mips_save_syscall-be471311cc9b
> >
> > Best regards,
> > --
> > Jiaxun Yang <jiaxun.yang@xxxxxxxxxxx>
>
> --
> - Jiaxun
>