Re: [REGRESSION] rseq: refactoring in v6.19 broke everyone on arm64 and tcmalloc everywhere
From: Mathias Stearn
Date: Tue Apr 28 2026 - 09:45:01 EST
On Tue, Apr 28, 2026 at 11:54 AM Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>
> On Tue, 28 Apr 2026 at 10:18, Thomas Gleixner <tglx@xxxxxxxxxx> wrote:
> >
> > Is there a pre-compiled version of those tcmalloc tests somewhere?
>
> I've attached an archive with 2 tests that I used.
Here is an additional test. It is the stress test I used to show that
it could result in two live allocations getting the same address. It
will run for up to a minute or until the first double allocation gets
detected (usually within 30ms on 6.19).
Arm binary is linked against glibc-2.35, x86 is 2.39. Should have no
other runtime deps.
GLIBC_TUNABLES=glibc.pthread.rseq=0 ./double_alloc_test.ARCH
Attachment:
double_alloc_test.x86.gz
Description: application/gzip
Attachment:
double_alloc_test.arm.gz
Description: application/gzip
//
// On Linux 6.19, membarrier RSEQ IPI no longer writes cpu_id_start.
// This breaks tcmalloc's StopCpu protocol: ShrinkOtherCache/DrainCpu can
// read slab objects concurrently with a Pop on the same CPU, giving two
// callers the same pointer (silent heap corruption).
//
// Detection: each allocation is stamped with a per-thread canary. If another
// thread receives the same pointer, it overwrites the canary. The original
// owner detects this on its next verification pass.
#include <sched.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <atomic>
#include <cstdio>
#include <thread>
#include <vector>
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "tcmalloc/malloc_extension.h"
namespace tcmalloc {
namespace {
constexpr int kNumThreads = 64;
constexpr int kMaxLivePerThread = 800;
constexpr absl::Duration kTestDuration = absl::Minutes(1);
constexpr size_t kAllocSizes[] = {16, 32, 48, 64, 80, 128, 256};
constexpr int kNumSizes = sizeof(kAllocSizes) / sizeof(kAllocSizes[0]);
struct Alloc {
void* ptr;
size_t size;
uint64_t canary;
};
static uint64_t MakeCanary(int tid, uint64_t counter) {
return (static_cast<uint64_t>(tid + 1) << 48) | (counter & 0xFFFFFFFFFFFFULL);
}
static int CanaryTid(uint64_t canary) {
return static_cast<int>(canary >> 48) - 1;
}
static void StampAlloc(void* ptr, size_t size, uint64_t canary) {
auto* p = static_cast<volatile uint64_t*>(ptr);
size_t n = size / sizeof(uint64_t);
for (size_t i = 0; i < n; ++i) {
p[i] = canary;
}
}
static bool VerifyAlloc(const Alloc& a) {
auto* p = static_cast<volatile uint64_t*>(a.ptr);
return p[0] == a.canary;
}
void test() {
MallocExtension::SetBackgroundProcessSleepInterval(absl::Milliseconds(1));
std::thread background([] {
MallocExtension::ProcessBackgroundActions();
});
std::atomic<bool> stop{false};
std::atomic<int> canary_corruptions{0};
std::atomic<uint64_t> total_allocs{0};
const auto start_time = absl::Now();
std::vector<std::thread> threads;
for (int tid = 0; tid < kNumThreads; ++tid) {
threads.emplace_back([&, tid] {
std::vector<Alloc> live;
live.reserve(kMaxLivePerThread + 128);
uint64_t counter = 0;
uint32_t rng = tid * 2654435761u + 1;
while (!stop.load(std::memory_order_relaxed)) {
rng = rng * 1103515245 + 12345;
size_t alloc_size = kAllocSizes[rng % kNumSizes];
for (int i = 0; i < 64 && static_cast<int>(live.size()) < kMaxLivePerThread; ++i) {
void* p = ::operator new(alloc_size);
uint64_t canary = MakeCanary(tid, ++counter);
StampAlloc(p, alloc_size, canary);
live.push_back({p, alloc_size, canary});
total_allocs.fetch_add(1, std::memory_order_relaxed);
}
for (size_t i = 0; i < live.size(); ++i) {
if (!VerifyAlloc(live[i])) {
auto* p = static_cast<volatile uint64_t*>(live[i].ptr);
uint64_t found = p[0];
int found_tid = CanaryTid(found);
int expected_tid = CanaryTid(live[i].canary);
int corruptions =
canary_corruptions.fetch_add(1, std::memory_order_relaxed) + 1;
fprintf(stderr,
"*** DOUBLE ALLOCATION DETECTED (#%d) ***\n"
" ptr=%p size=%zu\n"
" expected canary=0x%016lx (tid=%d)\n"
" found canary=0x%016lx (tid=%d)\n",
corruptions, live[i].ptr, live[i].size,
(unsigned long)live[i].canary, expected_tid,
(unsigned long)found, found_tid);
live[i].ptr = nullptr;
stop.store(true, std::memory_order_relaxed);
stop.notify_all();
}
}
size_t w = 0;
for (size_t r = 0; r < live.size(); ++r) {
if (live[r].ptr != nullptr) {
if (w != r) live[w] = live[r];
++w;
}
}
live.resize(w);
rng = rng * 1103515245 + 12345;
int to_free = live.size() / 2;
for (int i = 0; i < to_free; ++i) {
auto& a = live.back();
if (a.ptr) {
if (!VerifyAlloc(a)) {
auto* p = static_cast<volatile uint64_t*>(a.ptr);
uint64_t found = p[0];
canary_corruptions.fetch_add(1, std::memory_order_relaxed);
fprintf(stderr,
"*** DOUBLE ALLOCATION DETECTED (at free) ***\n"
" ptr=%p expected=0x%016lx found=0x%016lx\n",
a.ptr, (unsigned long)a.canary, (unsigned long)found);
a.ptr = nullptr;
stop.store(true, std::memory_order_relaxed);
stop.notify_all();
} else {
::operator delete(a.ptr, a.size);
}
}
live.pop_back();
}
}
if (canary_corruptions.load(std::memory_order_relaxed) == 0) {
for (auto& a : live) {
if (a.ptr) ::operator delete(a.ptr, a.size);
}
}
});
}
std::thread([&]{
absl::SleepFor(kTestDuration);
stop.store(true, std::memory_order_relaxed);
stop.notify_all();
}).detach();
stop.wait(false);
const auto elapsed = absl::ToDoubleSeconds(absl::Now() - start_time);
for (auto& t : threads) t.join();
MallocExtension::SetBackgroundProcessActionsEnabled(false);
background.join();
uint64_t ops = total_allocs.load();
int corruptions = canary_corruptions.load();
bool pass = (corruptions == 0);
fprintf(stderr,
"\n=== Results ===\n"
"Time: %fs\n"
"Total allocations: %lu\n"
"Canary corruptions (double allocations): %d\n %s\n",
elapsed, (unsigned long)ops, corruptions, pass ? "PASS" : "FAIL");
_exit(!pass);
}
} // namespace
} // namespace tcmalloc
int main() {
tcmalloc::test();
return 0;
}