Re: BUG: unable to handle kernel paging request in fuse_copy_do

From: Miklos Szeredi
Date: Fri Mar 22 2024 - 09:50:28 EST


[MM list + secretmem author CC-d]

On Thu, 21 Mar 2024 at 08:52, xingwei lee <xrivendell7@xxxxxxxxx> wrote:
>
> Hello I found a bug titled "BUG: unable to handle kernel paging
> request in fuse_copy_do” with modified syzkaller, and maybe it is
> related to fs/fuse.
> I also confirmed in the latest upstream.
>
> If you fix this issue, please add the following tag to the commit:
> Reported-by: xingwei lee <xrivendell7@xxxxxxxxx>
> Reported-by: yue sun <samsun1006219@xxxxxxxxx>

Thanks for the report. This looks like a secretmem vs get_user_pages issue.

I reduced the syz reproducer to a minimal one that isn't dependent on fuse:

=== repro.c ===
#define _GNU_SOURCE

#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/socket.h>

int main(void)
{
int fd1, fd2, fd3;
int pip[2];
struct iovec iov;
void *addr;

fd1 = syscall(__NR_memfd_secret, 0);
addr = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
ftruncate(fd1, 7);
fd2 = socket(AF_INET, SOCK_DGRAM, 0);
getsockopt(fd2, 0, 0, NULL, addr);

pipe(pip);
iov.iov_base = addr;
iov.iov_len = 0x50;
vmsplice(pip[1], &iov, 1, 0);

fd3 = open("/tmp/repro-secretmem.test", O_RDWR | O_CREAT, 0x600);
splice(pip[0], NULL, fd3, NULL, 0x50, 0);

return 0;
}
=======

Thanks,
Miklos

>
> kernel: upstream 23956900041d968f9ad0f30db6dede4daccd7aa9
> kernel config: https://syzkaller.appspot.com/text?tag=KernelConfig&x=9f47e8dfa53b0b11
> with KASAN enabled
> compiler: gcc (Debian 12.2.0-14) 12.2.0
>
> BUG: unable to handle kernel paging request in fuse_copy_do
> UDPLite: UDP-Lite is deprecated and scheduled to be removed in 2025,
> please contact the netdev mailing list
> BUG: unable to handle page fault for address: ffff88802c29c000
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 13001067 P4D 13001067 PUD 13002067 PMD 24c8d063 PTE 800fffffd3d63060
> Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
> CPU: 1 PID: 8221 Comm: 1e9 Not tainted 6.8.0-05202-g9187210eee7d-dirty #21
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> 1.16.2-1.fc38 04/01/2014
> RIP: 0010:memcpy+0xc/0x20 arch/x86/lib/memcpy_64.S:38
> Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90
> 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 48 89 f80
> RSP: 0018:ffffc9001065f9c8 EFLAGS: 00010246
> RAX: ffffc9001065fb10 RBX: ffffc9001065fc78 RCX: 0000000000000010
> RDX: 0000000000000010 RSI: ffff88802c29c000 RDI: ffffc9001065fb10
> RBP: 0000000000000010 R08: ffff88802c29c000 R09: 0000000000000001
> R10: ffffffff8ea82ed7 R11: ffffc9001065fd98 R12: ffffc9001065fac0
> R13: 0000000000000010 R14: ffffc9001065faf0 R15: ffffc9001065fcbc
> FS: 000000000f82d480(0000) GS:ffff88823bc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffff88802c29c000 CR3: 000000002dd7c000 CR4: 0000000000750ef0
> PKRU: 55555554
> Call Trace:
> <TASK>
> fuse_copy_do+0x152/0x340 fs/fuse/dev.c:758
> fuse_copy_one fs/fuse/dev.c:1007 [inline]
> fuse_dev_do_write+0x1df/0x26a0 fs/fuse/dev.c:1863
> fuse_dev_write+0x129/0x1b0 fs/fuse/dev.c:1960
> call_write_iter include/linux/fs.h:2108 [inline]
> new_sync_write fs/read_write.c:497 [inline]
> vfs_write+0x62e/0x10a0 fs/read_write.c:590
> ksys_write+0xf6/0x1d0 fs/read_write.c:643
> do_syscall_x64 arch/x86/entry/common.c:52 [inline]
> do_syscall_64+0x7c/0x1d0 arch/x86/entry/common.c:83
> entry_SYSCALL_64_after_hwframe+0x6c/0x74
>
> =* repro.c =*
> #define _GNU_SOURCE
>
> #include <dirent.h>
> #include <endian.h>
> #include <errno.h>
> #include <fcntl.h>
> #include <setjmp.h>
> #include <signal.h>
> #include <stdarg.h>
> #include <stdbool.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/prctl.h>
> #include <sys/stat.h>
> #include <sys/syscall.h>
> #include <sys/types.h>
> #include <sys/wait.h>
> #include <time.h>
> #include <unistd.h>
>
> #ifndef __NR_memfd_secret
> #define __NR_memfd_secret 447
> #endif
>
> static __thread int clone_ongoing;
> static __thread int skip_segv;
> static __thread jmp_buf segv_env;
>
> static void segv_handler(int sig, siginfo_t* info, void* ctx) {
> if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) {
> exit(sig);
> }
> uintptr_t addr = (uintptr_t)info->si_addr;
> const uintptr_t prog_start = 1 << 20;
> const uintptr_t prog_end = 100 << 20;
> int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0;
> int valid = addr < prog_start || addr > prog_end;
> if (skip && valid) {
> _longjmp(segv_env, 1);
> }
> exit(sig);
> }
>
> static void install_segv_handler(void) {
> struct sigaction sa;
> memset(&sa, 0, sizeof(sa));
> sa.sa_handler = SIG_IGN;
> syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
> syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
> memset(&sa, 0, sizeof(sa));
> sa.sa_sigaction = segv_handler;
> sa.sa_flags = SA_NODEFER | SA_SIGINFO;
> sigaction(SIGSEGV, &sa, NULL);
> sigaction(SIGBUS, &sa, NULL);
> }
>
> #define NONFAILING(...) \
> ({ \
> int ok = 1; \
> __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
> if (_setjmp(segv_env) == 0) { \
> __VA_ARGS__; \
> } else \
> ok = 0; \
> __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
> ok; \
> })
>
> static void sleep_ms(uint64_t ms) {
> usleep(ms * 1000);
> }
>
> static uint64_t current_time_ms(void) {
> struct timespec ts;
> if (clock_gettime(CLOCK_MONOTONIC, &ts))
> exit(1);
> return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> }
>
> static bool write_file(const char* file, const char* what, ...) {
> char buf[1024];
> va_list args;
> va_start(args, what);
> vsnprintf(buf, sizeof(buf), what, args);
> va_end(args);
> buf[sizeof(buf) - 1] = 0;
> int len = strlen(buf);
> int fd = open(file, O_WRONLY | O_CLOEXEC);
> if (fd == -1)
> return false;
> if (write(fd, buf, len) != len) {
> int err = errno;
> close(fd);
> errno = err;
> return false;
> }
> close(fd);
> return true;
> }
>
> static void kill_and_wait(int pid, int* status) {
> kill(-pid, SIGKILL);
> kill(pid, SIGKILL);
> for (int i = 0; i < 100; i++) {
> if (waitpid(-1, status, WNOHANG | __WALL) == pid)
> return;
> usleep(1000);
> }
> DIR* dir = opendir("/sys/fs/fuse/connections");
> if (dir) {
> for (;;) {
> struct dirent* ent = readdir(dir);
> if (!ent)
> break;
> if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
> continue;
> char abort[300];
> snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
> ent->d_name);
> int fd = open(abort, O_WRONLY);
> if (fd == -1) {
> continue;
> }
> if (write(fd, abort, 1) < 0) {
> }
> close(fd);
> }
> closedir(dir);
> } else {
> }
> while (waitpid(-1, status, __WALL) != pid) {
> }
> }
>
> static void setup_test() {
> prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> setpgrp();
> write_file("/proc/self/oom_score_adj", "1000");
> }
>
> static void execute_one(void);
>
> #define WAIT_FLAGS __WALL
>
> static void loop(void) {
> int iter = 0;
> for (;; iter++) {
> int pid = fork();
> if (pid < 0)
> exit(1);
> if (pid == 0) {
> setup_test();
> execute_one();
> exit(0);
> }
> int status = 0;
> uint64_t start = current_time_ms();
> for (;;) {
> if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
> break;
> sleep_ms(1);
> if (current_time_ms() - start < 5000)
> continue;
> kill_and_wait(pid, &status);
> break;
> }
> }
> }
>
> uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};
>
> void execute_one(void) {
> intptr_t res = 0;
> NONFAILING(memcpy((void*)0x20002040, "./file0\000", 8));
> syscall(__NR_mkdirat, /*fd=*/0xffffff9c, /*path=*/0x20002040ul, /*mode=*/0ul);
> NONFAILING(memcpy((void*)0x20002080, "/dev/fuse\000", 10));
> res = syscall(__NR_openat, /*fd=*/0xffffffffffffff9cul, /*file=*/0x20002080ul,
> /*flags=*/2ul, /*mode=*/0ul);
> if (res != -1)
> r[0] = res;
> NONFAILING(memcpy((void*)0x200020c0, "./file0\000", 8));
> NONFAILING(memcpy((void*)0x20002100, "fuse\000", 5));
> NONFAILING(memcpy((void*)0x20002140, "fd", 2));
> NONFAILING(*(uint8_t*)0x20002142 = 0x3d);
> NONFAILING(sprintf((char*)0x20002143, "0x%016llx", (long long)r[0]));
> NONFAILING(*(uint8_t*)0x20002155 = 0x2c);
> NONFAILING(memcpy((void*)0x20002156, "rootmode", 8));
> NONFAILING(*(uint8_t*)0x2000215e = 0x3d);
> NONFAILING(sprintf((char*)0x2000215f, "%023llo", (long long)0x4000));
> NONFAILING(*(uint8_t*)0x20002176 = 0x2c);
> NONFAILING(memcpy((void*)0x20002177, "user_id", 7));
> NONFAILING(*(uint8_t*)0x2000217e = 0x3d);
> NONFAILING(sprintf((char*)0x2000217f, "%020llu", (long long)0));
> NONFAILING(*(uint8_t*)0x20002193 = 0x2c);
> NONFAILING(memcpy((void*)0x20002194, "group_id", 8));
> NONFAILING(*(uint8_t*)0x2000219c = 0x3d);
> NONFAILING(sprintf((char*)0x2000219d, "%020llu", (long long)0));
> NONFAILING(*(uint8_t*)0x200021b1 = 0x2c);
> NONFAILING(*(uint8_t*)0x200021b2 = 0);
> syscall(__NR_mount, /*src=*/0ul, /*dst=*/0x200020c0ul, /*type=*/0x20002100ul,
> /*flags=*/0ul, /*opts=*/0x20002140ul);
> res = syscall(__NR_memfd_secret, /*flags=*/0ul);
> if (res != -1)
> r[1] = res;
> syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0xb36000ul,
> /*prot=PROT_GROWSUP|PROT_READ*/ 0x2000001ul,
> /*flags=MAP_STACK|MAP_POPULATE|MAP_FIXED|MAP_SHARED*/ 0x28011ul,
> /*fd=*/r[1], /*offset=*/0ul);
> syscall(__NR_ftruncate, /*fd=*/r[1], /*len=*/7ul);
> res = syscall(__NR_socket, /*domain=*/2ul, /*type=*/2ul, /*proto=*/0x88);
> if (res != -1)
> r[2] = res;
> NONFAILING(*(uint32_t*)0x20000280 = 0);
> syscall(__NR_getsockopt, /*fd=*/r[2], /*level=*/1, /*optname=*/0x11,
> /*optval=*/0ul, /*optlen=*/0x20000280ul);
> NONFAILING(*(uint32_t*)0x20000000 = 0x50);
> NONFAILING(*(uint32_t*)0x20000004 = 0);
> NONFAILING(*(uint64_t*)0x20000008 = 0);
> NONFAILING(*(uint32_t*)0x20000010 = 7);
> NONFAILING(*(uint32_t*)0x20000014 = 0x27);
> NONFAILING(*(uint32_t*)0x20000018 = 0);
> NONFAILING(*(uint32_t*)0x2000001c = 0);
> NONFAILING(*(uint16_t*)0x20000020 = 0);
> NONFAILING(*(uint16_t*)0x20000022 = 0);
> NONFAILING(*(uint32_t*)0x20000024 = 0);
> NONFAILING(*(uint32_t*)0x20000028 = 0);
> NONFAILING(*(uint16_t*)0x2000002c = 0);
> NONFAILING(*(uint16_t*)0x2000002e = 0);
> NONFAILING(memset((void*)0x20000030, 0, 32));
> syscall(__NR_write, /*fd=*/r[0], /*arg=*/0x20000000ul, /*len=*/0x50ul);
> }
> int main(void) {
> syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
> /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
> /*offset=*/0ul);
> syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
> /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
> /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
> /*offset=*/0ul);
> syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
> /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
> /*offset=*/0ul);
> install_segv_handler();
> loop();
> return 0;
> }
>
> =* repro.txt =*
> mkdirat(0xffffffffffffff9c, &(0x7f0000002040)='./file0\x00', 0x0)
> r0 = openat$fuse(0xffffffffffffff9c, &(0x7f0000002080), 0x2, 0x0)
> mount$fuse(0x0, &(0x7f00000020c0)='./file0\x00', &(0x7f0000002100),
> 0x0, &(0x7f0000002140)={{'fd', 0x3d, r0}, 0x2c, {'rootmode', 0x3d,
> 0x4000}})
> r1 = memfd_secret(0x0)
> mmap(&(0x7f0000000000/0xb36000)=nil, 0xb36000, 0x2000001, 0x28011, r1, 0x0)
> ftruncate(r1, 0x7)
> r2 = socket$inet_udplite(0x2, 0x2, 0x88)
> getsockopt$sock_cred(r2, 0x1, 0x11, 0x0, &(0x7f0000000280))
> write$FUSE_INIT(r0, &(0x7f0000000000)={0x50}, 0x50)
>
>
> see aslo https://gist.github.com/xrivendell7/961be96ae091c9671bb56efea902cec4.
>
> I hope it helps.
> best regards.
> xingwei Lee