[PATCH RFC 01/15] mm/slab: always zero only requested size on alloc

From: Vlastimil Babka (SUSE)

Date: Tue Jun 09 2026 - 05:24:36 EST

When zeroing on alloc is requested (by __GFP_ZERO or the init_on_alloc
parameter), we have been trying to zero the whole kmalloc bucket size
and not just requested size, if possible.

This probably comes from the past where ksize() could be used to
discover the bucket size and use it opportunistically beyond the
requested size. This is now forbidden and enabling debugging such as
KASAN or slab's red zoning would catch this misuse. Therefore, nobody
can be relying on __GFP_ZERO zeroing beyond requested size.

Theoretically it might still improve hardening in case of unintended
accesses beond requested size accessing some sensitive data from a
previous allocation. But then, init_on_free is probably used also for
hardening and would have cleared that.

So the usefullness of zeroing beyond requested size is practically none
nowadays. The disadvantages for doing it are:

- Interaction with KFENCE, which perfoms the zeroing on its own because
it has its own redzone beyond requested size. As a consequence
slab_post_alloc_hook() has an 'init' parameter which has to be
evaluated in all callers (via slab_want_init_on_alloc()).

For kfence allocations in slab_alloc_node() this evaluation is subtly
skipped over in order to do the right thing. Other callers (i.e.
kmem_cache_alloc_bulk_noprof()) evaluate it unconditionally even if
they do end up with a kfence allocation. This is only subtly not a
problem, as those are not kmalloc allocations and are using
s->object_size as requested size, so it doesn't interfere with kfence's
redzone. There's just a unnecessary double zeroing (in both kfence and
slab_post_alloc_hook()), but it's all very fragile and contradicts the
comment in kfence_guarded_alloc().

- Interaction with slab's redzoning where we have to limit the zeroing
to requested size.

We can make the code much more simple by always zeroing only up to the
requested size. Move slab_want_init_on_alloc() call to
slab_post_alloc_hook(), removing the parameter. Remove the red zone
handling.

For kfence's zeroing code, update the comment. We could remove it
completely, but due to possible interactions with KASAN, there are
configurations where neither slab or KASAN would zero the object,
so simply do it in kfence. At worst the zeroing will happen twice, but
kfence allocations are rare by design so the cost is negligible.

Signed-off-by: Vlastimil Babka (SUSE) <vbabka@xxxxxxxxxx>
---
mm/kfence/core.c | 6 +++---
mm/slub.c | 35 +++++++----------------------------
2 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 655dc5ce3240..c765ba0a3a67 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -499,9 +499,9 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
set_canary(meta);

/*
- * We check slab_want_init_on_alloc() ourselves, rather than letting
- * SL*B do the initialization, as otherwise we might overwrite KFENCE's
- * redzone.
+ * SLUB will generally init kfence objects, but due to possible
+ * interactions with KASAN, it might not happen, so do it ourselves.
+ * In the worst case the init just happens twice.
*/
if (unlikely(slab_want_init_on_alloc(gfp, cache)))
memzero_explicit(addr, size);
diff --git a/mm/slub.c b/mm/slub.c
index 63c1ef998dd3..f787dc422d1b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4565,26 +4565,14 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)

static __fastpath_inline
bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
- gfp_t flags, size_t size, void **p, bool init,
+ gfp_t flags, size_t size, void **p,
unsigned int orig_size)
{
- unsigned int zero_size = s->object_size;
+ bool init = slab_want_init_on_alloc(flags, s);
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;

- /*
- * For kmalloc object, the allocated memory size(object_size) is likely
- * larger than the requested size(orig_size). If redzone check is
- * enabled for the extra space, don't zero it, as it will be redzoned
- * soon. The redzone operation for this extra space could be seen as a
- * replacement of current poisoning under certain debug option, and
- * won't break other sanity checks.
- */
- if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
- (s->flags & SLAB_KMALLOC))
- zero_size = orig_size;
-
/*
* When slab_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
@@ -4607,7 +4595,7 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
if (p[i] && init && (!kasan_init ||
!kasan_has_integrated_init()))
- memset(p[i], 0, zero_size);
+ memset(p[i], 0, orig_size);
if (gfpflags_allow_spinning(flags))
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
@@ -4908,7 +4896,6 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
- bool init = false;

s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
@@ -4924,16 +4911,13 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);

maybe_wipe_obj_freeptr(s, object);
- init = slab_want_init_on_alloc(gfpflags, s);

out:
/*
- * When init equals 'true', like for kzalloc() family, only
- * @orig_size bytes might be zeroed instead of s->object_size
* In case this fails due to memcg_slab_post_alloc_hook(),
* object is set to NULL
*/
- slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
+ slab_post_alloc_hook(s, lru, gfpflags, 1, &object, orig_size);

return object;
}
@@ -5228,7 +5212,6 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf)
{
void *ret = NULL;
- bool init;

if (sheaf->size == 0)
goto out;
@@ -5238,10 +5221,8 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
if (likely(!ret))
ret = sheaf->objects[--sheaf->size];

- init = slab_want_init_on_alloc(gfp, s);
-
/* add __GFP_NOFAIL to force successful memcg charging */
- slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size);
+ slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, s->object_size);
out:
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE);

@@ -5421,8 +5402,7 @@ void *_kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags, in

success:
maybe_wipe_obj_freeptr(s, ret);
- slab_post_alloc_hook(s, NULL, alloc_gfp, 1, &ret,
- slab_want_init_on_alloc(alloc_gfp, s), orig_size);
+ slab_post_alloc_hook(s, NULL, alloc_gfp, 1, &ret, orig_size);

ret = kasan_kmalloc(s, ret, orig_size, alloc_gfp);
return ret;
@@ -7337,8 +7317,7 @@ bool kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags,

out:
/* memcg and kmem_cache debug support and memory initialization */
- return likely(slab_post_alloc_hook(s, NULL, flags, size, p,
- slab_want_init_on_alloc(flags, s), s->object_size));
+ return likely(slab_post_alloc_hook(s, NULL, flags, size, p, s->object_size));
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);

--
2.54.0