Re: [PATCH v17 02/16] preempt: Track NMI nesting to separate per-CPU counter
From: Peter Zijlstra
Date: Tue Feb 03 2026 - 07:15:39 EST
On Wed, Jan 21, 2026 at 05:39:05PM -0500, Lyude Paul wrote:
> #define __nmi_enter() \
> do { \
> lockdep_off(); \
> arch_nmi_enter(); \
> - BUG_ON(in_nmi() == NMI_MASK); \
> - __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
> + BUG_ON(__this_cpu_read(nmi_nesting) == UINT_MAX); \
> + __this_cpu_inc(nmi_nesting); \
> + __preempt_count_add(HARDIRQ_OFFSET); \
> + preempt_count_set(preempt_count() | NMI_MASK); \
> } while (0)
>
> #define nmi_enter() \
> @@ -124,8 +128,12 @@ void irq_exit_rcu(void);
>
> #define __nmi_exit() \
> do { \
> + unsigned int nesting; \
> BUG_ON(!in_nmi()); \
> - __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
> + __preempt_count_sub(HARDIRQ_OFFSET); \
> + nesting = __this_cpu_dec_return(nmi_nesting); \
> + if (!nesting) \
> + __preempt_count_sub(NMI_OFFSET); \
> arch_nmi_exit(); \
> lockdep_on(); \
> } while (0)
While not wrong like last time; it is pretty awful.
preempt_count_set() is a cmpxchg() loop.
Would not something like so be better?
#define __nmi_enter() \
do { \
+ unsigned int _o = NMI_MASK + HARDIRQ_OFFSET; \
lockdep_off(); \
arch_nmi_enter(); \
- BUG_ON(in_nmi() == NMI_MASK); \
- __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
+ BUG_ON(__this_cpu_read(nmi_nesting) == ~0U); \
+ __this_cpu_inc(nmi_nesting); \
+ _o -= (preempt_count() & NMI_MASK); \
+ __preempt_count_add(_o); \
} while (0)
#define __nmi_exit() \
do { \
+ unsigned int _o = HARDIRQ_OFFSET; \
BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ if (!__this_cpu_dec_return(nmi_nesting)) \
+ _o += NMI_MASK; \
+ __preempt_count_sub(_o); \
arch_nmi_exit(); \
lockdep_on(); \
} while (0)
But I'm really somewhat sad that 64bit can't do better than this.