Re: [PATCH 1/3] ptrace,syscall_user_dispatch: Implement Syscall User Dispatch Suspension

From: Damien Le Moal
Date: Fri Jan 20 2023 - 05:50:04 EST


On 1/20/23 19:23, Peter Zijlstra wrote:
> On Wed, Jan 18, 2023 at 03:10:53PM -0500, Gregory Price wrote:
>> Adds PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH to ptrace options, and
>> modify Syscall User Dispatch to suspend interception when enabled.
>>
>> This is modeled after the SUSPEND_SECCOMP feature, which suspends
>> SECCOMP interposition. Without doing this, software like CRIU will
>> inject system calls into a process and be intercepted by Syscall
>> User Dispatch, either causing a crash (due to blocked signals) or
>> the delivery of those signals to a ptracer (not the intended behavior).
>>
>> Since Syscall User Dispatch is not a privileged feature, a check
>> for permissions is not required, however attempting to set this
>> option when CONFIG_CHECKPOINT_RESTORE it not supported should be
>> disallowed, as its intended use is checkpoint/resume.
>>
>> Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx>
>
> One small nit -- see below, otherwise:
>
> Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
>
>> ---
>> include/linux/ptrace.h | 2 ++
>> include/uapi/linux/ptrace.h | 6 +++++-
>> kernel/entry/syscall_user_dispatch.c | 5 +++++
>> kernel/ptrace.c | 5 +++++
>> 4 files changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
>> index eaaef3ffec22..461ae5c99d57 100644
>> --- a/include/linux/ptrace.h
>> +++ b/include/linux/ptrace.h
>> @@ -45,6 +45,8 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
>>
>> #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
>> #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
>> +#define PT_SUSPEND_SYSCALL_USER_DISPATCH \
>> + (PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)
>>
>> extern long arch_ptrace(struct task_struct *child, long request,
>> unsigned long addr, unsigned long data);
>> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
>> index 195ae64a8c87..ba9e3f19a22c 100644
>> --- a/include/uapi/linux/ptrace.h
>> +++ b/include/uapi/linux/ptrace.h
>> @@ -146,9 +146,13 @@ struct ptrace_rseq_configuration {
>> /* eventless options */
>> #define PTRACE_O_EXITKILL (1 << 20)
>> #define PTRACE_O_SUSPEND_SECCOMP (1 << 21)
>> +#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH (1 << 22)
>>
>> #define PTRACE_O_MASK (\
>> - 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
>> + 0x000000ff | \
>> + PTRACE_O_EXITKILL | \
>> + PTRACE_O_SUSPEND_SECCOMP | \
>> + PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)
>>
>> #include <asm/ptrace.h>
>>
>> diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
>> index 0b6379adff6b..7607f4598dd8 100644
>> --- a/kernel/entry/syscall_user_dispatch.c
>> +++ b/kernel/entry/syscall_user_dispatch.c
>> @@ -8,6 +8,7 @@
>> #include <linux/uaccess.h>
>> #include <linux/signal.h>
>> #include <linux/elf.h>
>> +#include <linux/ptrace.h>
>>
>> #include <linux/sched/signal.h>
>> #include <linux/sched/task_stack.h>
>> @@ -36,6 +37,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
>> struct syscall_user_dispatch *sd = &current->syscall_dispatch;
>> char state;
>>
>> + if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
>> + unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))
>
> Align with the '(' pleaase.
>
>> + return false;
>> +
>> if (likely(instruction_pointer(regs) - sd->offset < sd->len))
>> return false;
>>
>> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
>> index 54482193e1ed..a6ad815bd4be 100644
>> --- a/kernel/ptrace.c
>> +++ b/kernel/ptrace.c
>> @@ -370,6 +370,11 @@ static int check_ptrace_options(unsigned long data)
>> if (data & ~(unsigned long)PTRACE_O_MASK)
>> return -EINVAL;
>>
>> + if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)) {
>> + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART))

Why not one if with a && ?

>> + return -EINVAL;
>> + }
>> +
>> if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
>> if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
>> !IS_ENABLED(CONFIG_SECCOMP))
>> --
>> 2.39.0
>>

--
Damien Le Moal
Western Digital Research