// Minimal single-file rseq repro for same-CPU preemption handling. // // Build: // g++ -O2 -std=c++20 -pthread rseq_same_cpu_preempt_test.cc -o rseq_same_cpu_preempt_test // // The main thread pins itself and a writer thread to one CPU. It then enters an // rseq critical section that stores 0 to a shared flag and spins until it sees // the flag become 1. If the critical section resumes after a preemption without // being aborted, it will eventually observe the writer's 1 and abort. // // The writer thread wakes every 10 usec and stores 1 to the shared flag. // // Expected behavior if rseq preemption aborts work correctly: // the program runs for 10 seconds and exits 0. // // Expected behavior if same-CPU preemption can resume inside the CS: // the main thread eventually reads 1 inside the CS and aborts. // // Note to readers: the top of this file is boring setup code. The interesting // code starts at run_one_rseq_attempt() so you should skip down there first. #include #include #include #include #include #include #include #include #include #include #include #include #if !defined(__aarch64__) && !defined(__x86_64__) #error "This repro is currently implemented for aarch64 and x86_64 only." #endif namespace { constexpr std::chrono::seconds kRuntime{10}; constexpr long kWriterSleepNs = 10'000; // 10 usec alignas(4) uint32_t g_shared_flag = 0; struct rseq* current_rseq_abi() { auto* thread_ptr = reinterpret_cast(__builtin_thread_pointer()); return reinterpret_cast(thread_ptr + __rseq_offset); } [[noreturn, gnu::format(printf, 1, 2)]] void die(const char* fmt, ...) { va_list args; va_start(args, fmt); std::vfprintf(stderr, fmt, args); va_end(args); std::fprintf(stderr, "\n"); _Exit(1); } [[noreturn]] void die_errno(const char* what) { die("%s failed: %s", what, std::strerror(errno)); } int pick_and_pin_first_allowed_cpu() { cpu_set_t set; CPU_ZERO(&set); if (sched_getaffinity(0, sizeof(set), &set) != 0) { die_errno("sched_getaffinity"); } for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) { if (CPU_ISSET(cpu, &set)) { CPU_ZERO(&set); CPU_SET(cpu, &set); if (sched_setaffinity(0, sizeof(set), &set) != 0) { die_errno("sched_setaffinity"); } return cpu; } } die("No allowed CPU found"); } #define RSEQ_STR_1(x) #x #define RSEQ_STR(x) RSEQ_STR_1(x) #define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ ".pushsection __rseq_cs, \"aw\" \n\t" \ ".balign 32 \n\t" \ RSEQ_STR(label) ": \n\t" \ ".long 0 \n\t" /* version */ \ ".long 0 \n\t" /* flags */ \ ".quad " RSEQ_STR(start_ip) " \n\t" /* start_ip */ \ ".quad " RSEQ_STR((post_commit_ip) - (start_ip)) " \n\t" /* post_commit_offset */ \ ".quad " RSEQ_STR(abort_ip) " \n\t" /* abort_ip */ \ ".popsection \n\t" int run_one_rseq_attempt(struct rseq* abi, uint32_t* shared_flag) { int result = 0; #ifdef C_EQUIVALENT // C equivalent: // Critical section: store 0, then spin until flag becomes 1 *shared_flag = 0; while (*shared_flag == 0) { // spin } result = 1; // Observed flag == 1 abort: // Abort handler (kernel jumps here if preempted inside CS) result = -1; // We correctly observed a preemption inside the CS #elif defined(__aarch64__) __asm__ __volatile__( RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) // Store address of rseq_cs descriptor into abi->rseq_cs " adrp x15, 1b \n" " add x15, x15, :lo12:1b \n" " str x15, %[rseq_cs] \n" "2: \n" // Critical section start (label 2) " str wzr, %[shared_flag] \n" // *shared_flag = 0 "5: \n" // Spin loop: while (*shared_flag == 0) {} " ldr w15, %[shared_flag] \n" // w15 = *shared_flag " cbz w15, 5b \n" // if (w15 == 0) goto 5 (spin) " mov %w[result], #1 \n" // result = 1 (observed flag == 1) "3: \n" // Critical section end - fall through " b 99f \n" // Jump past abort handler " .long %c[sig] \n" // RSEQ signature (magic bytes required by kernel) "4: \n" // Abort handler entry (label 4) " mov %w[result], #-1 \n" // result = -1 "99: \n" // End of abort handler : [result] "+r"(result), [rseq_cs] "=m"(abi->rseq_cs), [shared_flag] "+Q"(*shared_flag) : [sig] "i"(RSEQ_SIG) : "memory", "x15"); #elif defined(__x86_64__) __asm__ __volatile__( RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) // Store address of rseq_cs descriptor into abi->rseq_cs " leaq 1b(%%rip), %%rax \n\t" " movq %%rax, %[rseq_cs] \n\t" "2: \n\t" // Critical section start (label 2) " movl $0, %[shared_flag] \n\t" // *shared_flag = 0 "5: \n\t" // Spin loop: while (*shared_flag == 0) {} " movl %[shared_flag], %%eax \n\t" // eax = *shared_flag " testl %%eax, %%eax \n\t" // test eax == 0 " jz 5b \n\t" // if (eax == 0) goto 5 (spin) " movl $1, %[result] \n\t" // result = 1 (observed flag == 1) "3: \n\t" // Critical section end - fall through " jmp 99f \n\t" // Jump past abort handler " .long %c[sig] \n\t" // RSEQ signature (magic bytes required by kernel) "4: \n\t" // Abort handler entry (label 4) " movl $-1, %[result] \n\t" // result = -1 "99: \n\t" // End of abort handler : [result] "+r"(result), [rseq_cs] "=m"(abi->rseq_cs), [shared_flag] "+m"(*shared_flag) : [sig] "i"(RSEQ_SIG) : "memory", "cc", "rax"); #endif return result; } void writer_thread_main() { while (true) { std::this_thread::sleep_for(std::chrono::nanoseconds(kWriterSleepNs)); __atomic_store_n(&g_shared_flag, 1u, __ATOMIC_RELAXED); } } } // namespace int main() { if (__rseq_size == 0) { die("rseq is not registered for this thread (glibc __rseq_size == 0); " "need glibc >= 2.35 with rseq support and a kernel that supports rseq"); } const int cpu = pick_and_pin_first_allowed_cpu(); if ((int)current_rseq_abi()->cpu_id != cpu) { die("rseq abi cpu_id is %d after pinning rather than %d", current_rseq_abi()->cpu_id, cpu); } std::thread(writer_thread_main).detach(); const auto deadline = std::chrono::steady_clock::now() + kRuntime; uint64_t attempts = 0; uint64_t abort_retries = 0; while (std::chrono::steady_clock::now() < deadline) { ++attempts; const int rc = run_one_rseq_attempt(current_rseq_abi(), &g_shared_flag); if (rc == 1) { die("Observed shared_flag == 1 inside the rseq critical section " "after %llu attempts on cpu %d", static_cast(attempts), cpu); } else if (rc != -1) { die("Unexpected return value from rseq: %d after %llu attempts", rc, static_cast(attempts)); } ++abort_retries; } std::fprintf(stderr, "PASS: ran for %lld seconds on cpu %d, attempts=%llu abort_retries=%llu\n", static_cast(kRuntime.count()), cpu, static_cast(attempts), static_cast(abort_retries)); _Exit(0); }