Re: [PATCH 15/14] selftests: futex: Add tests for robust unlock within the critical section.

From: Thomas Gleixner

Date: Sat Apr 04 2026 - 16:13:37 EST


On Sat, Apr 04 2026 at 11:39, Sebastian Andrzej Siewior wrote:
> From: Sebastian Andrzej Siewior <sebastian@xxxxxxxxxxxxx>
>
> I took Thomas’ initial test case from the cover letter and reworked it
> so that it uses ptrace() to single‑step through the VDSO unlock
> operation. The test expects the lock to remain locked, with
> `list_op_pending' pointing somewhere, when entering the VDSO unlock
> path. Once execution steps into the critical section, it expects the
> kernel to perform the fixup that is, to unlock the lock and clear
> `list_op_pending'.

No. The kernel clears list_op_pending when the unlock via CMPXCHG was
successful.

The unlock and clear combo happens in the syscall with the UNLOCK
modifier set.

> +static char *vdso_dbg_file =
> +#ifdef __LP64__
> + "vdso64.so.dbg";
> +#else
> + "vdso32.so.dbg";
> +#endif

You wish. There is a zoo of names for the VDSOs and not all
architectures do the vdso64/32 scheme.

> +
> +static bool pc_is_within(struct user_regs_struct *regs, uint64_t start, uint64_t end)
> +{
> + unsigned long pc;
> +
> +#if defined(__x86_64__)
> + pc = regs->rip;
> +#elif defined(__riscv)
> + pc = reg->pc;
> +# error Missing ptrace support

#else
# error not supported

> +#endif
> + if (pc >= (long) start && pc < end)
> + return true;
> + return false;

return pc >= start && pc < end;

perhaps?

> +static int is_vdso_mod(const char *mod)
> +{
> + return !strncmp(mod, "[vdso: ", 7);
> +}
> +
> +static int module_callback(Dwfl_Module *mod, void **userdata, const char *name,
> + Dwarf_Addr start, void *arg)
> +{
> + const char *sname;

Snip.

> + if (dwfl_getmodules(dwfl, module_callback, NULL, 0) != 0)
> + err (1, "dwfl_getmodules: %s", dwfl_errmsg(-1));
> + dwfl_end(dwfl);
> +}

That's a lot of complicated code. Wouldn't it be simpler to have a
wrapper script which finds the vdso64/32.so.dbg for the system and gets
the offsets via objdump -t and hands them in as command line arguments?

Then you can use the simple function lookup from patch 14. No strong
preference though. Just a thought.

> +
> +static pthread_mutex_t test_mutex;

Newline between the variable and the code please.

> +static int unlock_uncontended(struct __test_metadata *_metadata, bool is_32bit)
> +{
> + struct robust_list_head *rhead;
> + uint32_t *lock;
> + pid_t lock_tid;
> + int error = 1;
> + pid_t tid;
> + size_t sz;
> +
> + syscall(SYS_get_robust_list, 0, &rhead, &sz);

Can fail.

> + pthread_mutex_init(&test_mutex, NULL);
> +
> + tid = gettid();
> + lock = (uint32_t *)&test_mutex.__data.__lock;
> + *lock = tid;
> + /* Set the pending prior unlocking */
> + rhead->list_op_pending = (struct robust_list *)&test_mutex.__data.__list.__next;
> +
> + raise(SIGTRAP);
> + if (is_32bit)
> + lock_tid = frtu32(lock, tid, (uint32_t *)&rhead->list_op_pending);
> + else
> + lock_tid = frtu64(lock, tid, (uint64_t *)&rhead->list_op_pending);
> +
> + if (lock_tid != tid)
> + TH_LOG("Non contended unlock failed. Return: %08x expected %08x\n",
> + lock_tid, tid);
> + else
> + error = 0;
> + return error;
> +}
> +
> +enum trace_state {
> + STATE_WAIT = 0,
> + STATE_ENTER_VDSO,
> + STATE_IN_CS,
> + STATE_OUT_CS,
> + STATE_LEAVE_VDSO,
> +};
> +
> +static void trace_child(struct __test_metadata *_metadata, pid_t child, bool is_32bit)
> +{
> + int state = STATE_WAIT;
> + struct robust_list_head *rhead;
> + size_t sz;
> + bool do_end = false;

Variable ordering reverse fir tree please.

> + syscall(SYS_get_robust_list, 0, &rhead, &sz);

Can fail.

> + do {
> + struct user_regs_struct regs;
> + uint64_t lock_val;
> + bool in_vdso;
> + int wstatus;
> + pid_t rpid;
> +
> + rpid = waitpid(child, &wstatus, 0);
> + if (rpid != child)
> + errx(1, "waitpid");
> + if (!do_end) {
> + if (WSTOPSIG(wstatus) != SIGTRAP)
> + errx(1, "NOT SIGTRAP");
> + } else {
> + if (!WIFEXITED(wstatus))
> + errx(1, "Did not exit, but we are done");
> + ASSERT_EQ(WEXITSTATUS(wstatus), 0);
> + return;
> + }
> +
> + if (ptrace(PTRACE_GETREGS, child, 0, &regs) != 0)
> + errx(1, "PTRACE_GETREGS");
> +
> + if (is_32bit) {
> + in_vdso = pc_is_within(&regs, (long)frtu32, frtu32_end);
> + } else {
> + in_vdso = pc_is_within(&regs, (long)frtu64, frtu64_end);
> + }

No brackets required.

> + if (in_vdso) {
> + unsigned long rhead_val;
> +
> + if (state == STATE_WAIT) {
> + state = STATE_ENTER_VDSO;
> +
> + } else {
> + if (is_32bit) {
> + if (pc_is_within(&regs, __futex_list32_try_unlock_cs_start,
> + __futex_list32_try_unlock_cs_end))
> + state = STATE_IN_CS;
> + else
> + state = STATE_OUT_CS;
> + } else {
> + if (pc_is_within(&regs, __futex_list64_try_unlock_cs_start,
> + __futex_list64_try_unlock_cs_end))
> + state = STATE_IN_CS;
> + else
> + state = STATE_OUT_CS;
> + }
> + }

See below.


> + errno = 0;
> + rhead_val = ptrace(PTRACE_PEEKDATA, child, &rhead->list_op_pending, 0);
> + if (rhead_val == -1 && errno != 0)
> + err(1, "PTRACE_PEEKDATA");
> +
> + lock_val = ptrace(PTRACE_PEEKDATA, child, &test_mutex.__data.__lock, 0);
> + if (lock_val == -1 && errno != 0)
> + err(1, "PTRACE_PEEKDATA");
> +
> + if (state == STATE_ENTER_VDSO) {
> + /* Entering VDSO, it supposed to be locked */
> + ASSERT_NE(rhead_val, 0);
> + ASSERT_EQ(lock_val, child);
> +
> + } else if (state == STATE_IN_CS) {
> + /*
> + * If the critical section has been entered then
> + * the kernel has to unlock and clean list_op_pending.

If the critical section has been entered then the cmpxchg succeeded for
the uncontended case and failed for the contended case. In the success
case the kernel must clear the pointer. In the failed case the kernel is
not allowed to touch it.

So the tests should validate both scenarios. You can differentiate
between the cases via lockval == child and lockval == child |
FUTEX_WAITERS or via a state flag.

Also in the success case it must reach the success label. In the failure
case it's not allowed to do so.

I think you can simplify the state handling as well.

if (ptrace(PTRACE_GETREGS, child, 0, &regs) != 0)
errx(1, "PTRACE_GETREGS");

rhead_val = ptrace(....);
if (test_32bit)
rhead_val &= UINT_MAX;

lock_val = ptrace(....);
/* Get the expected value to see which variant is tested */
lock_exp = ptrace(....);
ASSERT_EQ(lock_exp & ~FUTEX_WAITERS, child);

switch (state) {
case STATE_WAIT:
if (pc_is_within_function(regs, is_32bit))
state = STATE_IN_FUNCTION;
break;

case STATE_IN_FUNCTION:
ASSERT_NE(rhead_val, 0);
ASSERT_EQ(lock_val, lock_exp);
if (pc_is_within_cs(regs, is_32bit))
state = STATE_IN_CS;
break;

case STATE_IN_CS:
if (lock_exp & FUTEX_WAITERS) {
ASSERT_EQ(lock_val, lock_exp);
ASSERT_NE(rhead_val, 0);
ASSERT_NE(pc_within_success(regs, is_32bit), true);
} else {
ASSERT_EQ(lock_val, 0);
ASSERT_NE(rhead_val, 0);
if (pc_within_success(...))
state = STATE_IN_SUCCESS;
else
ASSERT_EQ(pc_within_cs(...), true);
}
break;
...
}

You get the idea.

> +
> +TEST(robust_unlock_64)
> +{
> + pid_t child;
> +
> + if (!frtu64)
> + SKIP(return, "Missing __vdso_futex_robust_list64_try_unlock() in VDSO\n");

Which is valid when this is built as 32-bit executable, so the test
should not be there at all.

Thanks,

tglx