Re: [PATCH v3 1/2] seccomp: passthrough uretprobe systemcall without filtering
From: Kees Cook
Date: Thu Feb 06 2025 - 16:21:04 EST
On Sun, Feb 02, 2025 at 08:29:20AM -0800, Eyal Birger wrote:
> When attaching uretprobes to processes running inside docker, the attached
> process is segfaulted when encountering the retprobe.
>
> The reason is that now that uretprobe is a system call the default seccomp
> filters in docker block it as they only allow a specific set of known
> syscalls. This is true for other userspace applications which use seccomp
> to control their syscall surface.
>
> Since uretprobe is a "kernel implementation detail" system call which is
> not used by userspace application code directly, it is impractical and
> there's very little point in forcing all userspace applications to
> explicitly allow it in order to avoid crashing tracked processes.
>
> Pass this systemcall through seccomp without depending on configuration.
>
> Note: uretprobe isn't supported in i386 and __NR_ia32_rt_tgsigqueueinfo
> uses the same number as __NR_uretprobe so the syscall isn't forced in the
> compat bitmap.
>
> Fixes: ff474a78cef5 ("uprobe: Add uretprobe syscall to speed up return probe")
> Reported-by: Rafael Buchbinder <rafi@xxxxxx>
> Link: https://lore.kernel.org/lkml/CAHsH6Gs3Eh8DFU0wq58c_LF8A4_+o6z456J7BidmcVY2AqOnHQ@xxxxxxxxxxxxxx/
> Link: https://lore.kernel.org/lkml/20250121182939.33d05470@xxxxxxxxxxxxxxxxxx/T/#me2676c378eff2d6a33f3054fed4a5f3afa64e65b
> Link: https://lore.kernel.org/lkml/20250128145806.1849977-1-eyal.birger@xxxxxxxxx/
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Eyal Birger <eyal.birger@xxxxxxxxx>
> ---
> v3: no change - deferring 32bit compat handling as there aren't plans to
> support this syscall in compat mode.
> v2: use action_cache bitmap and mode1 array to check the syscall
> ---
> kernel/seccomp.c | 24 +++++++++++++++++++++---
> 1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index f59381c4a2ff..09b6f8e6db51 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -734,13 +734,13 @@ seccomp_prepare_user_filter(const char __user *user_filter)
>
> #ifdef SECCOMP_ARCH_NATIVE
> /**
> - * seccomp_is_const_allow - check if filter is constant allow with given data
> + * seccomp_is_filter_const_allow - check if filter is constant allow with given data
> * @fprog: The BPF programs
> * @sd: The seccomp data to check against, only syscall number and arch
> * number are considered constant.
> */
> -static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> - struct seccomp_data *sd)
> +static bool seccomp_is_filter_const_allow(struct sock_fprog_kern *fprog,
> + struct seccomp_data *sd)
> {
> unsigned int reg_value = 0;
> unsigned int pc;
> @@ -812,6 +812,21 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> return false;
> }
>
> +static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
> + struct seccomp_data *sd)
> +{
> +#ifdef __NR_uretprobe
> + if (sd->nr == __NR_uretprobe
> +#ifdef SECCOMP_ARCH_COMPAT
> + && sd->arch != SECCOMP_ARCH_COMPAT
> +#endif
> + )
> + return true;
> +#endif
> +
> + return seccomp_is_filter_const_allow(fprog, sd);
> +}
> +
> static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
> void *bitmap, const void *bitmap_prev,
> size_t bitmap_size, int arch)
I minimized the above to:
@@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
if (WARN_ON_ONCE(!fprog))
return false;
+ /* Our single exception to filtering. */
+#ifdef __NR_uretprobe
+#ifdef SECCOMP_ARCH_COMPAT
+ if (sd->arch == SECCOMP_ARCH_NATIVE)
+#endif
+ if (sd->nr == __NR_uretprobe)
+ return true;
+#endif
+
for (pc = 0; pc < fprog->len; pc++) {
struct sock_filter *insn = &fprog->filter[pc];
u16 code = insn->code;
> @@ -1023,6 +1038,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
> */
> static const int mode1_syscalls[] = {
> __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
> +#ifdef __NR_uretprobe
> + __NR_uretprobe,
> +#endif
> -1, /* negative terminated */
> };
>
> --
> 2.43.0
>
-Kees
--
Kees Cook