Re: [PATCH v5 02/18] mm/sl[au]b: rearrange struct slab fields to allow larger rcu_head
From: Vlastimil Babka
Date: Fri Sep 02 2022 - 05:26:50 EST
On 9/2/22 00:17, Joel Fernandes (Google) wrote:
> From: Vlastimil Babka <vbabka@xxxxxxx>
>
> Joel reports [1] that increasing the rcu_head size for debugging
> purposes used to work before struct slab was split from struct page, but
> now runs into the various SLAB_MATCH() sanity checks of the layout.
>
> This is because the rcu_head in struct page is in union with large
> sub-structures and has space to grow without exceeding their size, while
> in struct slab (for SLAB and SLUB) it's in union only with a list_head.
>
> On closer inspection (and after the previous patch) we can put all
> fields except slab_cache to a union with rcu_head, as slab_cache is
> sufficient for the rcu freeing callbacks to work and the rest can be
> overwritten by rcu_head without causing issues.
>
> This is only somewhat complicated by the need to keep SLUB's
> freelist+counters aligned for cmpxchg_double. As a result the fields
> need to be reordered so that slab_cache is first (after page flags) and
> the union with rcu_head follows. For consistency, do that for SLAB as
> well, although not necessary there.
>
> As a result, the rcu_head field in struct page and struct slab is no
> longer at the same offset, but that doesn't matter as there is no
> casting that would rely on that in the slab freeing callbacks, so we can
> just drop the respective SLAB_MATCH() check.
>
> Also we need to update the SLAB_MATCH() for compound_head to reflect the
> new ordering.
>
> While at it, also add a static_assert to check the alignment needed for
> cmpxchg_double so mistakes are found sooner than a runtime GPF.
>
> [1] https://lore.kernel.org/all/85afd876-d8bb-0804-b2c5-48ed3055e702@xxxxxxxxxxxxxxxxx/
>
> Reported-by: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
I've added patches 01 and 02 to slab tree for -next exposure before Joel's
full series posting, but it should be also ok if rcu tree carries them with
the whole patchset. I can then drop them from slab tree (there are no
dependencies with other stuff there) so we don't introduce duplicite commits
needlessly, just give me a heads up.
> ---
> mm/slab.h | 54 ++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/mm/slab.h b/mm/slab.h
> index 4ec82bec15ec..2c248864ea91 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -11,37 +11,43 @@ struct slab {
>
> #if defined(CONFIG_SLAB)
>
> + struct kmem_cache *slab_cache;
> union {
> - struct list_head slab_list;
> + struct {
> + struct list_head slab_list;
> + void *freelist; /* array of free object indexes */
> + void *s_mem; /* first object */
> + };
> struct rcu_head rcu_head;
> };
> - struct kmem_cache *slab_cache;
> - void *freelist; /* array of free object indexes */
> - void *s_mem; /* first object */
> unsigned int active;
>
> #elif defined(CONFIG_SLUB)
>
> - union {
> - struct list_head slab_list;
> - struct rcu_head rcu_head;
> -#ifdef CONFIG_SLUB_CPU_PARTIAL
> - struct {
> - struct slab *next;
> - int slabs; /* Nr of slabs left */
> - };
> -#endif
> - };
> struct kmem_cache *slab_cache;
> - /* Double-word boundary */
> - void *freelist; /* first free object */
> union {
> - unsigned long counters;
> struct {
> - unsigned inuse:16;
> - unsigned objects:15;
> - unsigned frozen:1;
> + union {
> + struct list_head slab_list;
> +#ifdef CONFIG_SLUB_CPU_PARTIAL
> + struct {
> + struct slab *next;
> + int slabs; /* Nr of slabs left */
> + };
> +#endif
> + };
> + /* Double-word boundary */
> + void *freelist; /* first free object */
> + union {
> + unsigned long counters;
> + struct {
> + unsigned inuse:16;
> + unsigned objects:15;
> + unsigned frozen:1;
> + };
> + };
> };
> + struct rcu_head rcu_head;
> };
> unsigned int __unused;
>
> @@ -66,9 +72,10 @@ struct slab {
> #define SLAB_MATCH(pg, sl) \
> static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
> SLAB_MATCH(flags, __page_flags);
> -SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
> #ifndef CONFIG_SLOB
> -SLAB_MATCH(rcu_head, rcu_head);
> +SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
> +#else
> +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
> #endif
> SLAB_MATCH(_refcount, __page_refcount);
> #ifdef CONFIG_MEMCG
> @@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data);
> #endif
> #undef SLAB_MATCH
> static_assert(sizeof(struct slab) <= sizeof(struct page));
> +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB)
> +static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 16));
> +#endif
>
> /**
> * folio_slab - Converts from folio to slab.