Re: mm, slab/slub: Ensure kmem_cache_alloc_bulk() is available early

From: Hyeonggon Yoo
Date: Wed Feb 08 2023 - 08:20:31 EST


On Tue, Feb 07, 2023 at 03:16:53PM +0100, Thomas Gleixner wrote:
> The memory allocators are available during early boot even in the phase
> where interrupts are disabled and scheduling is not yet possible.
>
> The setup is so that GFP_KERNEL allocations work in this phase without
> causing might_alloc() splats to be emitted because the system state is
> SYSTEM_BOOTING at that point which prevents the warnings to trigger.
>
> Most allocation/free functions use local_irq_save()/restore() or a lock
> variant of that. But kmem_cache_alloc_bulk() and kmem_cache_free_bulk() use
> local_[lock]_irq_disable()/enable(), which leads to a lockdep warning when
> interrupts are enabled during the early boot phase.
>
> This went unnoticed so far as there are no early users of these
> interfaces. The upcoming conversion of the interrupt descriptor store from
> radix_tree to maple_tree triggered this warning as maple_tree uses the bulk
> interface.
>
> Cure this by moving the kmem_cache_alloc/free() bulk variants of SLUB and
> SLAB to local[_lock]_irq_save()/restore().
>
> There is obviously no reclaim possible and required at this point so there
> is no need to expand this coverage further.
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> Initial version: https://lore.kernel.org/r/87o7qdzfay.ffs@tglx
> Changes: Update SLAB as well and add changelog
> ---
> mm/slab.c | 18 ++++++++++--------
> mm/slub.c | 9 +++++----
> 2 files changed, 15 insertions(+), 12 deletions(-)
>
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3479,14 +3479,15 @@ cache_alloc_debugcheck_after_bulk(struct
> int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
> void **p)
> {
> - size_t i;
> struct obj_cgroup *objcg = NULL;
> + unsigned long irqflags;
> + size_t i;
>
> s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
> if (!s)
> return 0;
>
> - local_irq_disable();
> + local_irq_save(irqflags);
> for (i = 0; i < size; i++) {
> void *objp = kfence_alloc(s, s->object_size, flags) ?:
> __do_cache_alloc(s, flags, NUMA_NO_NODE);
> @@ -3495,7 +3496,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
> goto error;
> p[i] = objp;
> }
> - local_irq_enable();
> + local_irq_restore(irqflags);
>
> cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
>
> @@ -3508,7 +3509,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
> /* FIXME: Trace call missing. Christoph would like a bulk variant */
> return size;
> error:
> - local_irq_enable();
> + local_irq_restore(irqflags);
> cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
> slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
> kmem_cache_free_bulk(s, i, p);
> @@ -3610,8 +3611,9 @@ EXPORT_SYMBOL(kmem_cache_free);
>
> void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
> {
> + unsigned long flags;
>
> - local_irq_disable();
> + local_irq_save(flags);
> for (int i = 0; i < size; i++) {
> void *objp = p[i];
> struct kmem_cache *s;
> @@ -3621,9 +3623,9 @@ void kmem_cache_free_bulk(struct kmem_ca
>
> /* called via kfree_bulk */
> if (!folio_test_slab(folio)) {
> - local_irq_enable();
> + local_irq_restore(flags);
> free_large_kmalloc(folio, objp);
> - local_irq_disable();
> + local_irq_save(flags);
> continue;
> }
> s = folio_slab(folio)->slab_cache;
> @@ -3640,7 +3642,7 @@ void kmem_cache_free_bulk(struct kmem_ca
>
> __cache_free(s, objp, _RET_IP_);
> }
> - local_irq_enable();
> + local_irq_restore(flags);
>
> /* FIXME: add tracing */
> }
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3913,6 +3913,7 @@ static inline int __kmem_cache_alloc_bul
> size_t size, void **p, struct obj_cgroup *objcg)
> {
> struct kmem_cache_cpu *c;
> + unsigned long irqflags;
> int i;
>
> /*
> @@ -3921,7 +3922,7 @@ static inline int __kmem_cache_alloc_bul
> * handlers invoking normal fastpath.
> */
> c = slub_get_cpu_ptr(s->cpu_slab);
> - local_lock_irq(&s->cpu_slab->lock);
> + local_lock_irqsave(&s->cpu_slab->lock, irqflags);
>
> for (i = 0; i < size; i++) {
> void *object = kfence_alloc(s, s->object_size, flags);
> @@ -3942,7 +3943,7 @@ static inline int __kmem_cache_alloc_bul
> */
> c->tid = next_tid(c->tid);
>
> - local_unlock_irq(&s->cpu_slab->lock);
> + local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);
>
> /*
> * Invoking slow path likely have side-effect
> @@ -3956,7 +3957,7 @@ static inline int __kmem_cache_alloc_bul
> c = this_cpu_ptr(s->cpu_slab);
> maybe_wipe_obj_freeptr(s, p[i]);
>
> - local_lock_irq(&s->cpu_slab->lock);
> + local_lock_irqsave(&s->cpu_slab->lock, irqflags);
>
> continue; /* goto for-loop */
> }
> @@ -3965,7 +3966,7 @@ static inline int __kmem_cache_alloc_bul
> maybe_wipe_obj_freeptr(s, p[i]);
> }
> c->tid = next_tid(c->tid);
> - local_unlock_irq(&s->cpu_slab->lock);
> + local_unlock_irqrestore(&s->cpu_slab->lock, irqflags);

Looks good to me.

Reviewed-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx>

Thanks!

> slub_put_cpu_ptr(s->cpu_slab);
>
> return i;
>