Re: [PATCH v4 30/39] unwind_user/deferred: Make unwind deferral requests NMI-safe
From: Josh Poimboeuf
Date: Thu Jan 23 2025 - 18:34:59 EST
On Thu, Jan 23, 2025 at 11:17:34PM +0100, Peter Zijlstra wrote:
> On Thu, Jan 23, 2025 at 11:48:07AM -0800, Josh Poimboeuf wrote:
> > > > > cookie = READ_ONCE(current->unwind_info.cookie);
> > > > > do {
> > > > > if (cookie)
> > > > > return cookie;
> > > > > cookie = ctx_to_cookie(cpu, ctr+1);
> > > > > } while (!try_cmpxchg64(¤t->unwind_info.cookie, &cookie, cookie));
> >
> > Should not the 2nd argument be &zero?
>
> This I suppose
>
> cookie = READ_ONCE(current->unwind_info.cookie);
> do {
> if (cookie)
> return cookie;
> } while (!try_cmpxchg64(¤t->unwind_info.cookie, &cookie,
> ctx_to_cookie(cpu, ctr+1)));
Here's what I have at the moment. unwind_deferred_request() is vastly
simplified thanks to your suggestions, no more NMI-specific muck.
work->pending is replaced with work->cookie so the task work can use it
in case it crossed the cookie-clearing boundary (return-to-user with
IRQs disabled).
static inline bool nmi_supported(void)
{
return IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG);
}
/*
* Read the task context cookie, initializing it if this is the first request
* since the most recent entry from user.
*/
static u64 get_cookie(void)
{
struct unwind_task_info *info = ¤t->unwind_info;
u64 ctr, cookie, old_cookie = 0;
unsigned int cpu;
cookie = READ_ONCE(info->cookie);
if (cookie)
return cookie;
guard(irqsave)();
cpu = raw_smp_processor_id();
ctr = __this_cpu_read(unwind_ctx_ctr);
cookie = ctx_to_cookie(cpu, ++ctr);
/* Make sure the cookie is non-zero. */
if (!cookie)
cookie = ctx_to_cookie(cpu, ++ctr);
/* Write the task cookie unless an NMI just now swooped in to do so. */
if (!try_cmpxchg64(&info->cookie, &old_cookie, cookie))
return old_cookie;
/* Success, update the context counter. */
__this_cpu_write(unwind_ctx_ctr, ctr);
return cookie;
}
/*
* Schedule a user space unwind to be done in task work before exiting the
* kernel.
*
* The returned cookie output is a unique identifer for the current task entry
* context. Its value will also be passed to the callback function. It can be
* used to stitch kernel and user stack traces together in post-processing.
*
* It's valid to call this function multiple times for the same @work within
* the same task entry context. Each call will return the same cookie. If the
* callback is already pending, an error will be returned along with the
* cookie. If the callback is not pending because it has already been
* previously called for the same entry context, it will be called again with
* the same stack trace and cookie.
*
* Thus are three possible return scenarios:
*
* * return != 0, *cookie == 0: the operation failed, no pending callback.
*
* * return != 0, *cookie != 0: the callback is already pending. The cookie
* can still be used to correlate with the pending callback.
*
* * return == 0, *cookie != 0: the callback queued successfully. The
* callback is guaranteed to be called with the given cookie.
*/
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
{
enum task_work_notify_mode mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME;
u64 zero = 0;
int ret;
*cookie = 0;
if (!current->mm || !user_mode(task_pt_regs(current)))
return -EINVAL;
if (!nmi_supported() && in_nmi())
return -EINVAL;
/* Return a valid @cookie even if the callback is already pending. */
*cookie = READ_ONCE(work->cookie);
if (*cookie)
return -EEXIST;
*cookie = get_cookie();
/* Claim the work unless an NMI just now swooped in to do so. */
if (!try_cmpxchg64(&work->cookie, &zero, *cookie))
return -EEXIST;
/* The work has been claimed, now schedule it. */
ret = task_work_add(current, &work->work, mode);
if (WARN_ON_ONCE(ret)) {
WRITE_ONCE(work->cookie, 0);
return ret;
}
return 0;
}
--
Josh