Re: [PATCH v3 08/11] slub: Replace cmpxchg_double()

From: Peter Zijlstra
Date: Wed May 24 2023 - 05:34:26 EST


On Mon, May 15, 2023 at 09:57:07AM +0200, Peter Zijlstra wrote:

> @@ -3008,6 +3029,22 @@ static inline bool pfmemalloc_match(stru
> }
>
> #ifndef CONFIG_SLUB_TINY
> +static inline bool
> +__update_cpu_freelist_fast(struct kmem_cache *s,
> + void *freelist_old, void *freelist_new,
> + unsigned long tid)
> +{
> +#ifdef system_has_freelist_aba
> + freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
> + freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
> +
> + return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
> + old.full, new.full) == old.full;
> +#else
> + return false;
> +#endif
> +}
> +
> /*
> * Check the slab->freelist and either transfer the freelist to the
> * per cpu freelist or deactivate the slab.
> @@ -3359,11 +3396,7 @@ static __always_inline void *__slab_allo
> * against code executing on this cpu *not* from access by
> * other cpus.
> */
> - if (unlikely(!this_cpu_cmpxchg_double(
> - s->cpu_slab->freelist, s->cpu_slab->tid,
> - object, tid,
> - next_object, next_tid(tid)))) {
> -
> + if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
> note_cmpxchg_failure("slab_alloc", s, tid);
> goto redo;
> }
> @@ -3736,11 +3769,7 @@ static __always_inline void do_slab_free
>
> set_freepointer(s, tail_obj, freelist);
>
> - if (unlikely(!this_cpu_cmpxchg_double(
> - s->cpu_slab->freelist, s->cpu_slab->tid,
> - freelist, tid,
> - head, next_tid(tid)))) {
> -
> + if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
> note_cmpxchg_failure("slab_free", s, tid);
> goto redo;
> }

This isn't right; the this_cpu_cmpxchg_double() was unconditional and
relied on the local_irq_save() fallback when no native cmpxchg128 is
present.

The below delta makes things boot again when system_has_cmpxchg128 is
not defined.

I'm going to zap these patches from tip/locking/core for a few days and
fold the below back into the series and let it run through the robots
again.

---
mm/slab.h | 20 +++++++++++---------
mm/slub.c | 6 +-----
2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 5880c70de3d6..b191bf68e6e0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -6,36 +6,36 @@
*/
void __init kmem_cache_init(void);

-#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#ifdef CONFIG_64BIT
# ifdef system_has_cmpxchg128
# define system_has_freelist_aba() system_has_cmpxchg128()
# define try_cmpxchg_freelist try_cmpxchg128
-# define this_cpu_cmpxchg_freelist this_cpu_cmpxchg128
-typedef u128 freelist_full_t;
# endif
+#define this_cpu_cmpxchg_freelist this_cpu_cmpxchg128
+typedef u128 freelist_full_t;
#else /* CONFIG_64BIT */
# ifdef system_has_cmpxchg64
# define system_has_freelist_aba() system_has_cmpxchg64()
# define try_cmpxchg_freelist try_cmpxchg64
-# define this_cpu_cmpxchg_freelist this_cpu_cmpxchg64
-typedef u64 freelist_full_t;
# endif
+#define this_cpu_cmpxchg_freelist this_cpu_cmpxchg64
+typedef u64 freelist_full_t;
#endif /* CONFIG_64BIT */
-#endif /* CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
+
+#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+#undef system_has_freelist_aba
+#endif

/*
* Freelist pointer and counter to cmpxchg together, avoids the typical ABA
* problems with cmpxchg of just a pointer.
*/
typedef union {
-#ifdef system_has_freelist_aba
struct {
void *freelist;
unsigned long counter;
};
freelist_full_t full;
-#endif
} freelist_aba_t;

/* Reuses the bits in struct page */
@@ -82,7 +82,9 @@ struct slab {
};
};
};
+#ifdef system_has_freelist_aba
freelist_aba_t freelist_counter;
+#endif
};
};
struct rcu_head rcu_head;
@@ -110,7 +112,7 @@ SLAB_MATCH(memcg_data, memcg_data);
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
-static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
+static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
#endif

/**
diff --git a/mm/slub.c b/mm/slub.c
index 161b091746b7..af92c770606d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3034,15 +3034,11 @@ __update_cpu_freelist_fast(struct kmem_cache *s,
void *freelist_old, void *freelist_new,
unsigned long tid)
{
-#ifdef system_has_freelist_aba
freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };

return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
old.full, new.full) == old.full;
-#else
- return false;
-#endif
}

/*