Re: [PATCH v2 6/7] ptrace: introduce PTRACE_SET_SYSCALL_INFO request
From: Charlie Jenkins
Date: Wed Jan 15 2025 - 20:55:46 EST
On Mon, Jan 13, 2025 at 07:12:08PM +0200, Dmitry V. Levin wrote:
> PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements
> PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of
> system calls the tracee is blocked in.
>
> This API allows ptracers to obtain and modify system call details
> in a straightforward and architecture-agnostic way.
>
> Current implementation supports changing only those bits of system call
> information that are used by strace, namely, syscall number, syscall
> arguments, and syscall return value.
>
> Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO,
> such as instruction pointer and stack pointer, could be added later
> if needed, by using struct ptrace_syscall_info.flags to specify
> the additional details that should be set. Currently, flags and reserved
> fields of struct ptrace_syscall_info must be initialized with zeroes;
> arch, instruction_pointer, and stack_pointer fields are ignored.
>
> PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY,
> PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations.
> Other operations could be added later if needed.
>
> Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with
> PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that
> convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure
> to provide an API of changing the first system call argument on riscv
> architecture.
>
> ptrace(2) man page:
>
> long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
> ...
> PTRACE_SET_SYSCALL_INFO
> Modify information about the system call that caused the stop.
> The "data" argument is a pointer to struct ptrace_syscall_info
> that specifies the system call information to be set.
> The "addr" argument should be set to sizeof(struct ptrace_syscall_info)).
>
> Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@xxxxxxxxx/
> Signed-off-by: Dmitry V. Levin <ldv@xxxxxxxxx>
> ---
> include/linux/ptrace.h | 3 ++
> include/uapi/linux/ptrace.h | 4 +-
> kernel/ptrace.c | 95 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 101 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
> index 90507d4afcd6..c8dbf1e498bf 100644
> --- a/include/linux/ptrace.h
> +++ b/include/linux/ptrace.h
> @@ -17,6 +17,9 @@ struct syscall_info {
> struct seccomp_data data;
> };
>
> +/* sizeof() the first published struct ptrace_syscall_info */
> +#define PTRACE_SYSCALL_INFO_SIZE_VER0 84
> +
> extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
> void *buf, int len, unsigned int gup_flags);
>
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 72c038fc71d0..ca75b3ab5d22 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -74,6 +74,7 @@ struct seccomp_metadata {
> };
>
> #define PTRACE_GET_SYSCALL_INFO 0x420e
> +#define PTRACE_SET_SYSCALL_INFO 0x4212
> #define PTRACE_SYSCALL_INFO_NONE 0
> #define PTRACE_SYSCALL_INFO_ENTRY 1
> #define PTRACE_SYSCALL_INFO_EXIT 2
> @@ -81,7 +82,8 @@ struct seccomp_metadata {
>
> struct ptrace_syscall_info {
> __u8 op; /* PTRACE_SYSCALL_INFO_* */
> - __u8 pad[3];
> + __u8 reserved;
> + __u16 flags;
> __u32 arch;
> __u64 instruction_pointer;
> __u64 stack_pointer;
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 22e7d74cf4cd..41d37cb8f74a 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -1016,6 +1016,97 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size,
> write_size = min(actual_size, user_size);
> return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size;
> }
> +
> +static unsigned long
> +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + unsigned long args[ARRAY_SIZE(info->entry.args)];
> + int nr = info->entry.nr;
> + int i;
> +
> + if (nr != info->entry.nr)
> + return -ERANGE;
> +
> + for (i = 0; i < ARRAY_SIZE(args); i++) {
> + args[i] = info->entry.args[i];
> + if (args[i] != info->entry.args[i])
> + return -ERANGE;
> + }
> +
> + syscall_set_nr(child, regs, nr);
> + /*
> + * If the syscall number is set to -1, setting syscall arguments is not
> + * just pointless, it would also clobber the syscall return value on
> + * those architectures that share the same register both for the first
> + * argument of syscall and its return value.
> + */
> + if (nr != -1)
> + syscall_set_arguments(child, regs, args);
> +
> + return 0;
> +}
> +
> +static unsigned long
> +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + /*
> + * info->entry is currently a subset of info->seccomp,
> + * info->seccomp.ret_data is currently ignored.
> + */
> + return ptrace_set_syscall_info_entry(child, regs, info);
> +}
> +
> +static unsigned long
> +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
> + struct ptrace_syscall_info *info)
> +{
> + if (info->exit.is_error)
> + syscall_set_return_value(child, regs, info->exit.rval, 0);
> + else
> + syscall_set_return_value(child, regs, 0, info->exit.rval);
> +
> + return 0;
> +}
> +
> +static int
> +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
> + void __user *datavp)
> +{
> + struct pt_regs *regs = task_pt_regs(child);
> + struct ptrace_syscall_info info;
> + int error;
> +
> + BUILD_BUG_ON(sizeof(struct ptrace_syscall_info) < PTRACE_SYSCALL_INFO_SIZE_VER0);
> +
> + if (user_size < PTRACE_SYSCALL_INFO_SIZE_VER0 || user_size > PAGE_SIZE)
> + return -EINVAL;
> +
> + error = copy_struct_from_user(&info, sizeof(info), datavp, user_size);
> + if (error)
> + return error;
> +
> + /* Reserved for future use. */
> + if (info.flags || info.reserved)
> + return -EINVAL;
> +
> + /* Changing the type of the system call stop is not supported. */
> + if (ptrace_get_syscall_info_op(child) != info.op)
Since this isn't supported anyway, would it make sense to set the
info.op to ptrace_get_syscall_info_op(child) like is done for
get_syscall_info? The usecase I see for this is simplifying when the
user doesn't call PTRACE_GET_SYSCALL_INFO before calling
PTRACE_SET_SYSCALL_INFO.
- Charlie
> + return -EINVAL;
> +
> + switch (info.op) {
> + case PTRACE_SYSCALL_INFO_ENTRY:
> + return ptrace_set_syscall_info_entry(child, regs, &info);
> + case PTRACE_SYSCALL_INFO_EXIT:
> + return ptrace_set_syscall_info_exit(child, regs, &info);
> + case PTRACE_SYSCALL_INFO_SECCOMP:
> + return ptrace_set_syscall_info_seccomp(child, regs, &info);
> + default:
> + /* Other types of system call stops are not supported. */
> + return -EINVAL;
> + }
> +}
> #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
>
> int ptrace_request(struct task_struct *child, long request,
> @@ -1234,6 +1325,10 @@ int ptrace_request(struct task_struct *child, long request,
> case PTRACE_GET_SYSCALL_INFO:
> ret = ptrace_get_syscall_info(child, addr, datavp);
> break;
> +
> + case PTRACE_SET_SYSCALL_INFO:
> + ret = ptrace_set_syscall_info(child, addr, datavp);
> + break;
> #endif
>
> case PTRACE_SECCOMP_GET_FILTER:
> --
> ldv