Re: [PATCH v3 1/3] mm/slub: enable debugging memory wasting of kmalloc

From: Feng Tang
Date: Wed Jul 27 2022 - 08:59:23 EST




On 2022/7/27 18:20, Christoph Lameter wrote:
On Wed, 27 Jul 2022, Feng Tang wrote:

@@ -2905,7 +2950,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
* already disabled (which is the case for bulk allocation).
*/
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
- unsigned long addr, struct kmem_cache_cpu *c)
+ unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
{
void *freelist;
struct slab *slab;
@@ -3102,7 +3147,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* pointer.
*/
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
- unsigned long addr, struct kmem_cache_cpu *c)
+ unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
{
void *p;

@@ -3115,7 +3160,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c = slub_get_cpu_ptr(s->cpu_slab);
#endif

- p = ___slab_alloc(s, gfpflags, node, addr, c);
+ p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size);
#ifdef CONFIG_PREEMPT_COUNT
slub_put_cpu_ptr(s->cpu_slab);

This is modifying and making execution of standard slab functions more
expensive. Could you restrict modifications to the kmalloc subsystem?

kmem_cache_alloc() and friends are not doing any rounding up to power of
two sizes.

What is happening here is that you pass kmalloc object size info through
the kmem_cache_alloc functions so that the regular allocation functions
debug functionality can then save the kmalloc specific object request
size. This is active even when no debugging options are enabled.

Yes, it indeed is some extra cost which I don't like either.


Can you avoid that? Have kmalloc do the object allocation without passing
through the kmalloc request size and then add the original size info
to the debug field later after execution continues in the kmalloc functions?


How about the following patch which adds no new 'orig_size' to core functions (the following 2nd, 3rd redzone debug patch may also need
some changes).

(Our email server has just been changed, and my mutt can't
work correctly, so the format could be broken, and I attached the
new patch as well. Sorry for the inconvenience!)

Thanks,
Feng

---
diff --git a/include/linux/slab.h b/include/linux/slab.h

index 0fefdf528e0d..a713b0e5bbcd 100644

--- a/include/linux/slab.h

+++ b/include/linux/slab.h

@@ -29,6 +29,8 @@

#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)

/* DEBUG: Poison objects */

#define SLAB_POISON ((slab_flags_t __force)0x00000800U)

+/* Indicate a kmalloc slab */

+#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)

/* Align objs on cache lines */

#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)

/* Use GFP_DMA memory */

diff --git a/mm/slub.c b/mm/slub.c

index 862dbd9af4f5..97c21a37a6a1 100644

--- a/mm/slub.c

+++ b/mm/slub.c

@@ -191,6 +191,12 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)

return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);

}



+static inline bool slub_debug_orig_size(struct kmem_cache *s)

+{

+ return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&

+ (s->flags & SLAB_KMALLOC));

+}

+

void *fixup_red_left(struct kmem_cache *s, void *p)

{

if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))

@@ -816,6 +822,39 @@ static void print_slab_info(const struct slab *slab)

folio_flags(folio, 0));

}



+static inline unsigned int *get_orig_size_pointer(struct kmem_cache *s,

+ void *object)

+{

+ void *p = kasan_reset_tag(object);

+

+ p += get_info_end(s);

+ p += sizeof(struct track) * 2;

+ return (unsigned int *)p;

+}

+

+static void set_orig_size(struct kmem_cache *s,

+ void *object, unsigned int orig_size)

+{

+ unsigned int *p;

+

+ if (!slub_debug_orig_size(s))

+ return;

+

+ p = get_orig_size_pointer(s, object);

+ *p = orig_size;

+}

+

+static unsigned int get_orig_size(struct kmem_cache *s, void *object)

+{

+ unsigned int *p;

+

+ if (!slub_debug_orig_size(s))

+ return s->object_size;

+

+ p = get_orig_size_pointer(s, object);

+ return *p;

+}

+

static void slab_bug(struct kmem_cache *s, char *fmt, ...)

{

struct va_format vaf;

@@ -875,6 +914,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)

if (s->flags & SLAB_STORE_USER)

off += 2 * sizeof(struct track);



+ if (slub_debug_orig_size(s))

+ off += sizeof(unsigned int);

+

off += kasan_metadata_size(s);



if (off != size_from_object(s))

@@ -1026,10 +1068,14 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)

{

unsigned long off = get_info_end(s); /* The end of info */



- if (s->flags & SLAB_STORE_USER)

+ if (s->flags & SLAB_STORE_USER) {

/* We also have user information there */

off += 2 * sizeof(struct track);



+ if (s->flags & SLAB_KMALLOC)

+ off += sizeof(unsigned int);

+ }

+

off += kasan_metadata_size(s);



if (size_from_object(s) == off)

@@ -1335,6 +1381,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,

/* Success perform special debug activities for allocs */

if (s->flags & SLAB_STORE_USER)

set_track(s, object, TRACK_ALLOC, addr);

+

trace(s, slab, object, 1);

init_object(s, object, SLUB_RED_ACTIVE);

return 1;

@@ -3240,6 +3287,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l

init = slab_want_init_on_alloc(gfpflags, s);



out:

+#ifdef CONFIG_SLUB_DEBUG

+ set_orig_size(s, object, orig_size);

+#endif

slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);



return object;

@@ -4112,12 +4162,17 @@ static int calculate_sizes(struct kmem_cache *s)

}



#ifdef CONFIG_SLUB_DEBUG

- if (flags & SLAB_STORE_USER)

+ if (flags & SLAB_STORE_USER) {

/*

* Need to store information about allocs and frees after

* the object.

*/

size += 2 * sizeof(struct track);

+

+ /* Save the original kmalloc request size */

+ if (flags & SLAB_KMALLOC)

+ size += sizeof(unsigned int);

+ }

#endif



kasan_cache_create(s, &size, &s->flags);

@@ -4842,7 +4897,7 @@ void __init kmem_cache_init(void)



/* Now we can use the kmem_cache to allocate kmalloc slabs */

setup_kmalloc_cache_index_table();

- create_kmalloc_caches(0);

+ create_kmalloc_caches(SLAB_KMALLOC);



/* Setup random freelists for each cache */

init_freelist_randomization();

@@ -5068,6 +5123,7 @@ struct location {

depot_stack_handle_t handle;

unsigned long count;

unsigned long addr;

+ unsigned long waste;

long long sum_time;

long min_time;

long max_time;

@@ -5114,13 +5170,15 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)

}



static int add_location(struct loc_track *t, struct kmem_cache *s,

- const struct track *track)

+ const struct track *track,

+ unsigned int orig_size)

{

long start, end, pos;

struct location *l;

- unsigned long caddr, chandle;

+ unsigned long caddr, chandle, cwaste;

unsigned long age = jiffies - track->when;

depot_stack_handle_t handle = 0;

+ unsigned int waste = s->object_size - orig_size;



#ifdef CONFIG_STACKDEPOT

handle = READ_ONCE(track->handle);

@@ -5138,11 +5196,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,

if (pos == end)

break;



- caddr = t->loc[pos].addr;

- chandle = t->loc[pos].handle;

- if ((track->addr == caddr) && (handle == chandle)) {

+ l = &t->loc[pos];

+ caddr = l->addr;

+ chandle = l->handle;

+ cwaste = l->waste;

+ if ((track->addr == caddr) && (handle == chandle) &&

+ (waste == cwaste)) {



- l = &t->loc[pos];

l->count++;

if (track->when) {

l->sum_time += age;

@@ -5167,6 +5227,9 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,

end = pos;

else if (track->addr == caddr && handle < chandle)

end = pos;

+ else if (track->addr == caddr && handle == chandle &&

+ waste < cwaste)

+ end = pos;

else

start = pos;

}

@@ -5190,6 +5253,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,

l->min_pid = track->pid;

l->max_pid = track->pid;

l->handle = handle;

+ l->waste = waste;

cpumask_clear(to_cpumask(l->cpus));

cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));

nodes_clear(l->nodes);

@@ -5208,7 +5272,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,



for_each_object(p, s, addr, slab->objects)

if (!test_bit(__obj_to_index(s, addr, p), obj_map))

- add_location(t, s, get_track(s, p, alloc));

+ add_location(t, s, get_track(s, p, alloc), get_orig_size(s, p));

}

#endif /* CONFIG_DEBUG_FS */

#endif /* CONFIG_SLUB_DEBUG */

@@ -6078,6 +6142,10 @@ static int slab_debugfs_show(struct seq_file *seq, void *v)

else

seq_puts(seq, "<not-available>");



+ if (l->waste)

+ seq_printf(seq, " waste=%lu/%lu",

+ l->count * l->waste, l->waste);

+

if (l->sum_time != l->min_time) {

seq_printf(seq, " age=%ld/%llu/%ld",

l->min_time, div_u64(l->sum_time, l->count),

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d..a713b0e5bbcd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -29,6 +29,8 @@
#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)
/* DEBUG: Poison objects */
#define SLAB_POISON ((slab_flags_t __force)0x00000800U)
+/* Indicate a kmalloc slab */
+#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)
/* Align objs on cache lines */
#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
/* Use GFP_DMA memory */
diff --git a/mm/slub.c b/mm/slub.c
index 862dbd9af4f5..97c21a37a6a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -191,6 +191,12 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
}

+static inline bool slub_debug_orig_size(struct kmem_cache *s)
+{
+ return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
+ (s->flags & SLAB_KMALLOC));
+}
+
void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
@@ -816,6 +822,39 @@ static void print_slab_info(const struct slab *slab)
folio_flags(folio, 0));
}

+static inline unsigned int *get_orig_size_pointer(struct kmem_cache *s,
+ void *object)
+{
+ void *p = kasan_reset_tag(object);
+
+ p += get_info_end(s);
+ p += sizeof(struct track) * 2;
+ return (unsigned int *)p;
+}
+
+static void set_orig_size(struct kmem_cache *s,
+ void *object, unsigned int orig_size)
+{
+ unsigned int *p;
+
+ if (!slub_debug_orig_size(s))
+ return;
+
+ p = get_orig_size_pointer(s, object);
+ *p = orig_size;
+}
+
+static unsigned int get_orig_size(struct kmem_cache *s, void *object)
+{
+ unsigned int *p;
+
+ if (!slub_debug_orig_size(s))
+ return s->object_size;
+
+ p = get_orig_size_pointer(s, object);
+ return *p;
+}
+
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
{
struct va_format vaf;
@@ -875,6 +914,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
if (s->flags & SLAB_STORE_USER)
off += 2 * sizeof(struct track);

+ if (slub_debug_orig_size(s))
+ off += sizeof(unsigned int);
+
off += kasan_metadata_size(s);

if (off != size_from_object(s))
@@ -1026,10 +1068,14 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned long off = get_info_end(s); /* The end of info */

- if (s->flags & SLAB_STORE_USER)
+ if (s->flags & SLAB_STORE_USER) {
/* We also have user information there */
off += 2 * sizeof(struct track);

+ if (s->flags & SLAB_KMALLOC)
+ off += sizeof(unsigned int);
+ }
+
off += kasan_metadata_size(s);

if (size_from_object(s) == off)
@@ -1335,6 +1381,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
/* Success perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_ALLOC, addr);
+
trace(s, slab, object, 1);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
@@ -3240,6 +3287,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l
init = slab_want_init_on_alloc(gfpflags, s);

out:
+#ifdef CONFIG_SLUB_DEBUG
+ set_orig_size(s, object, orig_size);
+#endif
slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);

return object;
@@ -4112,12 +4162,17 @@ static int calculate_sizes(struct kmem_cache *s)
}

#ifdef CONFIG_SLUB_DEBUG
- if (flags & SLAB_STORE_USER)
+ if (flags & SLAB_STORE_USER) {
/*
* Need to store information about allocs and frees after
* the object.
*/
size += 2 * sizeof(struct track);
+
+ /* Save the original kmalloc request size */
+ if (flags & SLAB_KMALLOC)
+ size += sizeof(unsigned int);
+ }
#endif

kasan_cache_create(s, &size, &s->flags);
@@ -4842,7 +4897,7 @@ void __init kmem_cache_init(void)

/* Now we can use the kmem_cache to allocate kmalloc slabs */
setup_kmalloc_cache_index_table();
- create_kmalloc_caches(0);
+ create_kmalloc_caches(SLAB_KMALLOC);

/* Setup random freelists for each cache */
init_freelist_randomization();
@@ -5068,6 +5123,7 @@ struct location {
depot_stack_handle_t handle;
unsigned long count;
unsigned long addr;
+ unsigned long waste;
long long sum_time;
long min_time;
long max_time;
@@ -5114,13 +5170,15 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
}

static int add_location(struct loc_track *t, struct kmem_cache *s,
- const struct track *track)
+ const struct track *track,
+ unsigned int orig_size)
{
long start, end, pos;
struct location *l;
- unsigned long caddr, chandle;
+ unsigned long caddr, chandle, cwaste;
unsigned long age = jiffies - track->when;
depot_stack_handle_t handle = 0;
+ unsigned int waste = s->object_size - orig_size;

#ifdef CONFIG_STACKDEPOT
handle = READ_ONCE(track->handle);
@@ -5138,11 +5196,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
if (pos == end)
break;

- caddr = t->loc[pos].addr;
- chandle = t->loc[pos].handle;
- if ((track->addr == caddr) && (handle == chandle)) {
+ l = &t->loc[pos];
+ caddr = l->addr;
+ chandle = l->handle;
+ cwaste = l->waste;
+ if ((track->addr == caddr) && (handle == chandle) &&
+ (waste == cwaste)) {

- l = &t->loc[pos];
l->count++;
if (track->when) {
l->sum_time += age;
@@ -5167,6 +5227,9 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
end = pos;
else if (track->addr == caddr && handle < chandle)
end = pos;
+ else if (track->addr == caddr && handle == chandle &&
+ waste < cwaste)
+ end = pos;
else
start = pos;
}
@@ -5190,6 +5253,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
l->min_pid = track->pid;
l->max_pid = track->pid;
l->handle = handle;
+ l->waste = waste;
cpumask_clear(to_cpumask(l->cpus));
cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
nodes_clear(l->nodes);
@@ -5208,7 +5272,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,

for_each_object(p, s, addr, slab->objects)
if (!test_bit(__obj_to_index(s, addr, p), obj_map))
- add_location(t, s, get_track(s, p, alloc));
+ add_location(t, s, get_track(s, p, alloc), get_orig_size(s, p));
}
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SLUB_DEBUG */
@@ -6078,6 +6142,10 @@ static int slab_debugfs_show(struct seq_file *seq, void *v)
else
seq_puts(seq, "<not-available>");

+ if (l->waste)
+ seq_printf(seq, " waste=%lu/%lu",
+ l->count * l->waste, l->waste);
+
if (l->sum_time != l->min_time) {
seq_printf(seq, " age=%ld/%llu/%ld",
l->min_time, div_u64(l->sum_time, l->count),