Re: [PATCH v3 08/11] slub: Replace cmpxchg_double()

From: Peter Zijlstra
Date: Thu May 25 2023 - 06:31:15 EST


On Wed, May 24, 2023 at 11:32:47AM +0200, Peter Zijlstra wrote:
> On Mon, May 15, 2023 at 09:57:07AM +0200, Peter Zijlstra wrote:
>
> > @@ -3008,6 +3029,22 @@ static inline bool pfmemalloc_match(stru
> > }
> >
> > #ifndef CONFIG_SLUB_TINY
> > +static inline bool
> > +__update_cpu_freelist_fast(struct kmem_cache *s,
> > + void *freelist_old, void *freelist_new,
> > + unsigned long tid)
> > +{
> > +#ifdef system_has_freelist_aba
> > + freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
> > + freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
> > +
> > + return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
> > + old.full, new.full) == old.full;
> > +#else
> > + return false;
> > +#endif
> > +}
> > +
> > /*
> > * Check the slab->freelist and either transfer the freelist to the
> > * per cpu freelist or deactivate the slab.
> > @@ -3359,11 +3396,7 @@ static __always_inline void *__slab_allo
> > * against code executing on this cpu *not* from access by
> > * other cpus.
> > */
> > - if (unlikely(!this_cpu_cmpxchg_double(
> > - s->cpu_slab->freelist, s->cpu_slab->tid,
> > - object, tid,
> > - next_object, next_tid(tid)))) {
> > -
> > + if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
> > note_cmpxchg_failure("slab_alloc", s, tid);
> > goto redo;
> > }
> > @@ -3736,11 +3769,7 @@ static __always_inline void do_slab_free
> >
> > set_freepointer(s, tail_obj, freelist);
> >
> > - if (unlikely(!this_cpu_cmpxchg_double(
> > - s->cpu_slab->freelist, s->cpu_slab->tid,
> > - freelist, tid,
> > - head, next_tid(tid)))) {
> > -
> > + if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
> > note_cmpxchg_failure("slab_free", s, tid);
> > goto redo;
> > }
>
> This isn't right; the this_cpu_cmpxchg_double() was unconditional and
> relied on the local_irq_save() fallback when no native cmpxchg128 is
> present.

This then also means I need to look at this_cpu_cmpxchg128 and
this_cpu_cmoxchg64 behaviour when we dont have the CPUID feature.

Because current verions seem to assume the instruction is present.