Re: [PATCH] locking/local_lock: Reduce local_[un]lock_nested_bh() overhead

From: Eric Dumazet

Date: Mon Mar 09 2026 - 10:04:28 EST


On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
>
> On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
>
> > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > --- a/include/linux/local_lock.h
> > +++ b/include/linux/local_lock.h
> > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > local_unlock_irqrestore(_T->lock, _T->flags),
> > unsigned long flags)
> >
> > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > #define local_lock_nested_bh(_lock) \
> > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > #define local_unlock_nested_bh(_lock) \
> > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > +#else
> > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > +#endif
>
> This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> clang >= 22.1
>
> How come that this isn't DCEd properly?

It might be partially done.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0e217041958a83d2a3c18de2965808442546c49b..50455951dc38668b0cbbcccdb2c5ce726e3c4da9
100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -7498,3 +7498,12 @@ struct vlan_type_depth
__vlan_get_protocol_offset(const struct sk_buff *skb,
};
}
EXPORT_SYMBOL(__vlan_get_protocol_offset);
+
+void ericeric(void);
+void ericeric(void)
+{
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+}

objdump --disassemble=ericeric -r net/core/skbuff.o

net/core/skbuff.o: file format elf64-x86-64


Disassembly of section .text:

000000000000fe40 <ericeric>:
fe40: f3 0f 1e fa endbr64
fe44: e8 00 00 00 00 call fe49 <ericeric+0x9>
fe45: R_X86_64_PLT32 __fentry__-0x4
fe49: 65 48 8b 05 00 00 00 mov %gs:0x0(%rip),%rax # fe51
<ericeric+0x11>
fe50: 00
fe4d: R_X86_64_PC32 this_cpu_off-0x4
fe51: 2e e9 00 00 00 00 cs jmp fe57 <ericeric+0x17>
fe53: R_X86_64_PLT32 __x86_return_thunk-0x4

Disassembly of section .init.text: