Re: [RFC PATCH] mm/slub: remove left-over debugging code
From: David Rientjes
Date: Mon Sep 16 2019 - 14:32:32 EST
On Mon, 16 Sep 2019, Qian Cai wrote:
> SLUB_RESILIENCY_TEST and SLUB_DEBUG_CMPXCHG look like some left-over
> debugging code during the internal development that probably nobody uses
> it anymore. Remove them to make the world greener.
Adding Pengfei Li who has been working on a patchset for modified handling
of kmalloc cache initialization and touches the resiliency test.
I still find the resiliency test to be helpful/instructional for handling
unexpected conditions in these caches, so I'd suggest against removing it:
the only downside is that it's additional source code. But it's helpful
source code for reference.
The cmpxchg failures could likely be more generalized beyond SLUB since
there will be other dependencies in the kernel than just this allocator.
(I assume you didn't send a Signed-off-by line because this is an RFC.)
> ---
> mm/slub.c | 110 --------------------------------------------------------------
> 1 file changed, 110 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 8834563cdb4b..f97155ba097d 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -150,12 +150,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
> * - Variable sizing of the per node arrays
> */
>
> -/* Enable to test recovery from slab corruption on boot */
> -#undef SLUB_RESILIENCY_TEST
> -
> -/* Enable to log cmpxchg failures */
> -#undef SLUB_DEBUG_CMPXCHG
> -
> /*
> * Mininum number of partial slabs. These will be left on the partial
> * lists even if they are empty. kmem_cache_shrink may reclaim them.
> @@ -392,10 +386,6 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
> cpu_relax();
> stat(s, CMPXCHG_DOUBLE_FAIL);
>
> -#ifdef SLUB_DEBUG_CMPXCHG
> - pr_info("%s %s: cmpxchg double redo ", n, s->name);
> -#endif
> -
> return false;
> }
>
> @@ -433,10 +423,6 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
> cpu_relax();
> stat(s, CMPXCHG_DOUBLE_FAIL);
>
> -#ifdef SLUB_DEBUG_CMPXCHG
> - pr_info("%s %s: cmpxchg double redo ", n, s->name);
> -#endif
> -
> return false;
> }
>
> @@ -2004,45 +1990,11 @@ static inline unsigned long next_tid(unsigned long tid)
> return tid + TID_STEP;
> }
>
> -static inline unsigned int tid_to_cpu(unsigned long tid)
> -{
> - return tid % TID_STEP;
> -}
> -
> -static inline unsigned long tid_to_event(unsigned long tid)
> -{
> - return tid / TID_STEP;
> -}
> -
> static inline unsigned int init_tid(int cpu)
> {
> return cpu;
> }
>
> -static inline void note_cmpxchg_failure(const char *n,
> - const struct kmem_cache *s, unsigned long tid)
> -{
> -#ifdef SLUB_DEBUG_CMPXCHG
> - unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
> -
> - pr_info("%s %s: cmpxchg redo ", n, s->name);
> -
> -#ifdef CONFIG_PREEMPT
> - if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
> - pr_warn("due to cpu change %d -> %d\n",
> - tid_to_cpu(tid), tid_to_cpu(actual_tid));
> - else
> -#endif
> - if (tid_to_event(tid) != tid_to_event(actual_tid))
> - pr_warn("due to cpu running other code. Event %ld->%ld\n",
> - tid_to_event(tid), tid_to_event(actual_tid));
> - else
> - pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
> - actual_tid, tid, next_tid(tid));
> -#endif
> - stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
> -}
> -
> static void init_kmem_cache_cpus(struct kmem_cache *s)
> {
> int cpu;
> @@ -2751,7 +2703,6 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
> object, tid,
> next_object, next_tid(tid)))) {
>
> - note_cmpxchg_failure("slab_alloc", s, tid);
> goto redo;
> }
> prefetch_freepointer(s, next_object);
> @@ -4694,66 +4645,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
> }
> #endif /* CONFIG_SLUB_DEBUG */
>
> -#ifdef SLUB_RESILIENCY_TEST
> -static void __init resiliency_test(void)
> -{
> - u8 *p;
> - int type = KMALLOC_NORMAL;
> -
> - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
> -
> - pr_err("SLUB resiliency testing\n");
> - pr_err("-----------------------\n");
> - pr_err("A. Corruption after allocation\n");
> -
> - p = kzalloc(16, GFP_KERNEL);
> - p[16] = 0x12;
> - pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
> - p + 16);
> -
> - validate_slab_cache(kmalloc_caches[type][4]);
> -
> - /* Hmmm... The next two are dangerous */
> - p = kzalloc(32, GFP_KERNEL);
> - p[32 + sizeof(void *)] = 0x34;
> - pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
> - p);
> - pr_err("If allocated object is overwritten then not detectable\n\n");
> -
> - validate_slab_cache(kmalloc_caches[type][5]);
> - p = kzalloc(64, GFP_KERNEL);
> - p += 64 + (get_cycles() & 0xff) * sizeof(void *);
> - *p = 0x56;
> - pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
> - p);
> - pr_err("If allocated object is overwritten then not detectable\n\n");
> - validate_slab_cache(kmalloc_caches[type][6]);
> -
> - pr_err("\nB. Corruption after free\n");
> - p = kzalloc(128, GFP_KERNEL);
> - kfree(p);
> - *p = 0x78;
> - pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
> - validate_slab_cache(kmalloc_caches[type][7]);
> -
> - p = kzalloc(256, GFP_KERNEL);
> - kfree(p);
> - p[50] = 0x9a;
> - pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
> - validate_slab_cache(kmalloc_caches[type][8]);
> -
> - p = kzalloc(512, GFP_KERNEL);
> - kfree(p);
> - p[512] = 0xab;
> - pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
> - validate_slab_cache(kmalloc_caches[type][9]);
> -}
> -#else
> -#ifdef CONFIG_SYSFS
> -static void resiliency_test(void) {};
> -#endif
> -#endif /* SLUB_RESILIENCY_TEST */
> -
> #ifdef CONFIG_SYSFS
> enum slab_stat_type {
> SL_ALL, /* All slabs */
> @@ -5875,7 +5766,6 @@ static int __init slab_sysfs_init(void)
> }
>
> mutex_unlock(&slab_mutex);
> - resiliency_test();
> return 0;
> }
>
> --
> 1.8.3.1
>
>