Re: [RFC PATCH] mm/slub: remove left-over debugging code

From: Pengfei Li
Date: Tue Sep 17 2019 - 09:24:44 EST


On Tue, Sep 17, 2019 at 2:32 AM David Rientjes <rientjes@xxxxxxxxxx> wrote:
>
> On Mon, 16 Sep 2019, Qian Cai wrote:
>
> > SLUB_RESILIENCY_TEST and SLUB_DEBUG_CMPXCHG look like some left-over
> > debugging code during the internal development that probably nobody uses
> > it anymore. Remove them to make the world greener.
>
> Adding Pengfei Li who has been working on a patchset for modified handling
> of kmalloc cache initialization and touches the resiliency test.
>

Thanks for looping me in.

My opinion is the same as David Rientjes.
The resiliency test should not be removed but should be improved.

> I still find the resiliency test to be helpful/instructional for handling
> unexpected conditions in these caches, so I'd suggest against removing it:
> the only downside is that it's additional source code. But it's helpful
> source code for reference.
>
> The cmpxchg failures could likely be more generalized beyond SLUB since
> there will be other dependencies in the kernel than just this allocator.
>
> (I assume you didn't send a Signed-off-by line because this is an RFC.)
>
> > ---
> > mm/slub.c | 110 --------------------------------------------------------------
> > 1 file changed, 110 deletions(-)
> >
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 8834563cdb4b..f97155ba097d 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -150,12 +150,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
> > * - Variable sizing of the per node arrays
> > */
> >
> > -/* Enable to test recovery from slab corruption on boot */
> > -#undef SLUB_RESILIENCY_TEST
> > -
> > -/* Enable to log cmpxchg failures */
> > -#undef SLUB_DEBUG_CMPXCHG
> > -
> > /*
> > * Mininum number of partial slabs. These will be left on the partial
> > * lists even if they are empty. kmem_cache_shrink may reclaim them.
> > @@ -392,10 +386,6 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
> > cpu_relax();
> > stat(s, CMPXCHG_DOUBLE_FAIL);
> >
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > - pr_info("%s %s: cmpxchg double redo ", n, s->name);
> > -#endif
> > -
> > return false;
> > }
> >
> > @@ -433,10 +423,6 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
> > cpu_relax();
> > stat(s, CMPXCHG_DOUBLE_FAIL);
> >
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > - pr_info("%s %s: cmpxchg double redo ", n, s->name);
> > -#endif
> > -
> > return false;
> > }
> >
> > @@ -2004,45 +1990,11 @@ static inline unsigned long next_tid(unsigned long tid)
> > return tid + TID_STEP;
> > }
> >
> > -static inline unsigned int tid_to_cpu(unsigned long tid)
> > -{
> > - return tid % TID_STEP;
> > -}
> > -
> > -static inline unsigned long tid_to_event(unsigned long tid)
> > -{
> > - return tid / TID_STEP;
> > -}
> > -
> > static inline unsigned int init_tid(int cpu)
> > {
> > return cpu;
> > }
> >
> > -static inline void note_cmpxchg_failure(const char *n,
> > - const struct kmem_cache *s, unsigned long tid)
> > -{
> > -#ifdef SLUB_DEBUG_CMPXCHG
> > - unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
> > -
> > - pr_info("%s %s: cmpxchg redo ", n, s->name);
> > -
> > -#ifdef CONFIG_PREEMPT
> > - if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
> > - pr_warn("due to cpu change %d -> %d\n",
> > - tid_to_cpu(tid), tid_to_cpu(actual_tid));
> > - else
> > -#endif
> > - if (tid_to_event(tid) != tid_to_event(actual_tid))
> > - pr_warn("due to cpu running other code. Event %ld->%ld\n",
> > - tid_to_event(tid), tid_to_event(actual_tid));
> > - else
> > - pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
> > - actual_tid, tid, next_tid(tid));
> > -#endif
> > - stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
> > -}
> > -
> > static void init_kmem_cache_cpus(struct kmem_cache *s)
> > {
> > int cpu;
> > @@ -2751,7 +2703,6 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
> > object, tid,
> > next_object, next_tid(tid)))) {
> >
> > - note_cmpxchg_failure("slab_alloc", s, tid);
> > goto redo;
> > }
> > prefetch_freepointer(s, next_object);
> > @@ -4694,66 +4645,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
> > }
> > #endif /* CONFIG_SLUB_DEBUG */
> >
> > -#ifdef SLUB_RESILIENCY_TEST
> > -static void __init resiliency_test(void)
> > -{
> > - u8 *p;
> > - int type = KMALLOC_NORMAL;
> > -
> > - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
> > -
> > - pr_err("SLUB resiliency testing\n");
> > - pr_err("-----------------------\n");
> > - pr_err("A. Corruption after allocation\n");
> > -
> > - p = kzalloc(16, GFP_KERNEL);
> > - p[16] = 0x12;
> > - pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
> > - p + 16);
> > -
> > - validate_slab_cache(kmalloc_caches[type][4]);
> > -
> > - /* Hmmm... The next two are dangerous */
> > - p = kzalloc(32, GFP_KERNEL);
> > - p[32 + sizeof(void *)] = 0x34;
> > - pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
> > - p);
> > - pr_err("If allocated object is overwritten then not detectable\n\n");
> > -
> > - validate_slab_cache(kmalloc_caches[type][5]);
> > - p = kzalloc(64, GFP_KERNEL);
> > - p += 64 + (get_cycles() & 0xff) * sizeof(void *);
> > - *p = 0x56;
> > - pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
> > - p);
> > - pr_err("If allocated object is overwritten then not detectable\n\n");
> > - validate_slab_cache(kmalloc_caches[type][6]);
> > -
> > - pr_err("\nB. Corruption after free\n");
> > - p = kzalloc(128, GFP_KERNEL);
> > - kfree(p);
> > - *p = 0x78;
> > - pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
> > - validate_slab_cache(kmalloc_caches[type][7]);
> > -
> > - p = kzalloc(256, GFP_KERNEL);
> > - kfree(p);
> > - p[50] = 0x9a;
> > - pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
> > - validate_slab_cache(kmalloc_caches[type][8]);
> > -
> > - p = kzalloc(512, GFP_KERNEL);
> > - kfree(p);
> > - p[512] = 0xab;
> > - pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
> > - validate_slab_cache(kmalloc_caches[type][9]);
> > -}
> > -#else
> > -#ifdef CONFIG_SYSFS
> > -static void resiliency_test(void) {};
> > -#endif
> > -#endif /* SLUB_RESILIENCY_TEST */
> > -
> > #ifdef CONFIG_SYSFS
> > enum slab_stat_type {
> > SL_ALL, /* All slabs */
> > @@ -5875,7 +5766,6 @@ static int __init slab_sysfs_init(void)
> > }
> >
> > mutex_unlock(&slab_mutex);
> > - resiliency_test();
> > return 0;
> > }
> >
> > --
> > 1.8.3.1
> >
> >