Re: [PATCH] signal: Extend exec_id to 64bits

From: Adam Zabrocki
Date: Thu Apr 02 2020 - 22:11:10 EST


On Thu, Apr 02, 2020 at 06:46:49AM +0200, Jann Horn wrote:
> On Wed, Apr 1, 2020 at 10:50 PM Eric W. Biederman <ebiederm@xxxxxxxxxxxx> wrote:
> > Replace the 32bit exec_id with a 64bit exec_id to make it impossible
> > to wrap the exec_id counter. With care an attacker can cause exec_id
> > wrap and send arbitrary signals to a newly exec'd parent. This
> > bypasses the signal sending checks if the parent changes their
> > credentials during exec.
> >
> > The severity of this problem can been seen that in my limited testing
> > of a 32bit exec_id it can take as little as 19s to exec 65536 times.
> > Which means that it can take as little as 14 days to wrap a 32bit
> > exec_id. Adam Zabrocki has succeeded wrapping the self_exe_id in 7
> > days. Even my slower timing is in the uptime of a typical server.
>
> FYI, if you actually optimize this, it's more like 12s to exec 1048576
> times according to my test, which means ~14 hours for 2^32 executions
> (on a single core). That's on an i7-4790 (a Haswell desktop processor
> that was launched about six years ago, in 2014).
>

Yep, there are a few ways of optimizing it and I believe I've pointed it out
here:
https://www.openwall.com/lists/kernel-hardening/2020/03/31/11

Thanks for doing such tests :)

I've also modified your PoC to use 'sysenter' and 'syscall' instruction. Both
cases gave me an extra 4% speed bump (including a test for 64-bits
"fast_execve"). I've run it under Intel(R) Xeon(R) E-2176G CPU @ 3.70GHz

As you've proven, it is possible to be done in a matter of hours.

Thanks,
Adam

> Here's my test code:
>
> =============
> $ grep 'model name' /proc/cpuinfo | head -n1
> model name : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
> $ cat build.sh
> #!/bin/sh
> set -e
> nasm -felf32 -o fast_execve.o fast_execve.asm
> ld -m elf_i386 -o fast_execve fast_execve.o
> gcc -o launch launch.c -Wall
> gcc -o finish finish.c -Wall
> $ cat fast_execve.asm
> bits 32
>
> section .text
> global _start
> _start:
> ; eax = argv[0]
> ; expected to be 8 hex digits, with 'a' meaning 0x0 and 'p' meaning 0xf
> mov eax, [esp+4]
>
> mov ebx, 0 ; loop counter
> hex_digit_loop:
> inc byte [eax+ebx]
> cmp byte [eax+ebx], 'a'+16
> jne next_exec
> mov byte [eax+ebx], 'a'
> inc ebx
> cmp ebx, 5 ;;;;;;;;;;;;;;;;;; this is N, where iteration_count=pow(16,N)
> jne hex_digit_loop
>
>
> ; reached pow(256,N) execs, get out
>
> ; first make the stack big again
> mov eax, 75 ; setrlimit (32-bit ABI)
> mov ebx, 3 ; RLIMIT_STACK
> mov ecx, stacklim
> int 0x80
>
> ; execute end helper
> mov ebx, 4 ; dirfd = 4
> jmp common_exec
>
> next_exec:
> mov ebx, 3 ; dirfd = 3
>
> common_exec: ; execveat() with file descriptor passed in as ebx
> mov ecx, nullval ; pathname = empty string
> lea edx, [esp+4] ; argv
> mov esi, 0 ; envp
> mov edi, 0x1000 ; flags = AT_EMPTY_PATH
> mov eax, 358 ; execveat (32-bit ABI)
> int 0x80
> int3
>
> nullval:
> dd 0
> stacklim:
> dd 0x02000000
> dd 0xffffffff
> $ cat launch.c
> #define _GNU_SOURCE
> #include <fcntl.h>
> #include <err.h>
> #include <unistd.h>
> #include <sys/syscall.h>
> #include <sys/resource.h>
> int main(void) {
> close(3);
> close(4);
> if (open("fast_execve", O_PATH) != 3)
> err(1, "open fast_execve");
> if (open("finish", O_PATH) != 4)
> err(1, "open finish");
> char *argv[] = { "aaaaaaaa", NULL };
>
> struct rlimit lim;
> if (getrlimit(RLIMIT_STACK, &lim))
> err(1, "getrlimit");
> lim.rlim_cur = 0x4000;
> if (setrlimit(RLIMIT_STACK, &lim))
> err(1, "setrlimit");
>
> syscall(__NR_execveat, 3, "", argv, NULL, AT_EMPTY_PATH);
> }
> $ cat finish.c
> #include <stdlib.h>
> int main(void) {
> exit(0);
> }
> $ ./build.sh
> $ time ./launch
>
> real 0m12,075s
> user 0m0,905s
> sys 0m11,026s
> $
> =============

--
pi3 (pi3ki31ny) - pi3 (at) itsec pl
http://pi3.com.pl