Re: [PATCH v4 30/39] unwind_user/deferred: Make unwind deferral requests NMI-safe

From: Peter Zijlstra
Date: Wed Jan 22 2025 - 09:15:59 EST


On Tue, Jan 21, 2025 at 06:31:22PM -0800, Josh Poimboeuf wrote:

> diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
> index 2f38055cce48..939c94abaa50 100644
> --- a/kernel/unwind/deferred.c
> +++ b/kernel/unwind/deferred.c
> @@ -29,27 +29,49 @@ static u64 ctx_to_cookie(u64 cpu, u64 ctx)
>
> /*
> * Read the task context cookie, first initializing it if this is the first
> - * call to get_cookie() since the most recent entry from user.
> + * call to get_cookie() since the most recent entry from user. This has to be
> + * done carefully to coordinate with unwind_deferred_request_nmi().
> */
> static u64 get_cookie(struct unwind_task_info *info)
> {
> u64 ctx_ctr;
> u64 cookie;
> - u64 cpu;
>
> guard(irqsave)();
>
> - cookie = info->cookie;
> + cookie = READ_ONCE(info->cookie);
> if (cookie)
> return cookie;
>
> + ctx_ctr = __this_cpu_read(unwind_ctx_ctr);
>
> - cpu = raw_smp_processor_id();
> - ctx_ctr = __this_cpu_inc_return(unwind_ctx_ctr);
> - info->cookie = ctx_to_cookie(cpu, ctx_ctr);
> + /* Read ctx_ctr before info->nmi_cookie */
> + barrier();
> +
> + cookie = READ_ONCE(info->nmi_cookie);
> + if (cookie) {
> + /*
> + * This is the first call to get_cookie() since an NMI handler
> + * first wrote it to info->nmi_cookie. Sync it.
> + */
> + WRITE_ONCE(info->cookie, cookie);
> + WRITE_ONCE(info->nmi_cookie, 0);
> + return cookie;
> + }
> +
> + /*
> + * Write info->cookie. It's ok to race with an NMI here. The value of
> + * the cookie is based on ctx_ctr from before the NMI could have
> + * incremented it. The result will be the same even if cookie or
> + * ctx_ctr end up getting written twice.
> + */
> + cookie = ctx_to_cookie(raw_smp_processor_id(), ctx_ctr + 1);
> + WRITE_ONCE(info->cookie, cookie);
> + WRITE_ONCE(info->nmi_cookie, 0);
> + barrier();
> + __this_cpu_write(unwind_ctx_ctr, ctx_ctr + 1);
>
> return cookie;
> -
> }

Oh gawd. Can we please do something simple like:

guard(irqsave)();
cpu = raw_smp_processor_id();
ctr = __this_cpu_read(unwind_ctx_cnt);
cookie = READ_ONCE(current->unwind_info.cookie);
do {
if (cookie)
return cookie;
cookie = ctx_to_cookie(cpu, ctr+1);
} while (!try_cmpxchg64(&current->unwind_info.cookie, &cookie, cookie));
__this_cpu_write(unwind_ctx_ctr, ctr+1);
return cookie;