kmalloc statistics patch

Gordon Oliver (gordo@lazos.cl)
Sat, 11 Jan 1997 19:14:47 -0300 (CDT)


Hi all.
There was a bit about a printk in kmalloc to see statistics. This
should be a faster version of the same. WARNING: This patch is
against kernel 2.0.0, and thus might have trouble with newer kernels
though it shouldn't (sorry, 2.0.0 is all I have here at the wrong end of
a _very_ slow connection).
I've run with this patch without problem (some performance hiccups)

Info (based on i386, probably similar for others):
- Someone allocates 256 byte blocks fairly often (I noticed this using
slip). Unfortunately kmalloc(256) yeilds a ~512 byte block...
- fork does three separate large allocations that are not particularly
optimized on a i386 (516, 1066, and 996 which yield ~1024, ~2048 and
~1024). The total memory actually used is slightly more than 2k.
- the only real source of "random" allocations in my system is the network
code.

Notes:
1) The address that gets printed will only work (on my system anyway) if the
kernel is compiled _without_ -fomit-frame-pointer (otherwise it just gets
the size argument.
2) The numbers for the cache size, and the min/max times between prints can
be tweaked as you like. A small cache size w.r.t the min time between dumps
will cause data to be lost.
3) The dump will cause a slight hiccup in performance... ah well. The gathering
of the statistics is pretty light load.
4) you can turn the patch off with an #undef KMALLOC_STATS where it is
currently defined.
5) if anybody wants to run long tests, I have a couple of awk scripts that
should reduce the output into reasonably useful output (not really very
complicated).
-gordo
---------------------------------------------------------------------------
*** mm/kmalloc.c.dist Tue Jan 7 15:25:44 1997
--- mm/kmalloc.c Sat Jan 11 18:06:55 1997
***************
*** 12,17 ****
--- 12,19 ----
* pages. So for 'page' throughout, read 'area'.
*
* Largely rewritten.. Linus
+ *
+ * Modified by Gordon Oliver (gordo@lazos.cl) to add kmalloc profiling.
*/

#include <linux/mm.h>
***************
*** 223,228 ****
--- 225,344 ----
return start_mem;
}

+ #define KMALLOC_STATS
+ #ifdef KMALLOC_STATS
+ /*
+ * memory statistics package (time varying, quick hack)
+ * Copyright (C) 1997 Gordon Oliver
+ *
+ * Notes: This is re-entrant by dint of the bit-operations. It has one
+ * critical section (where it is dumping out the results). This section
+ * disables the gathering of new statistics.
+ * If the routine has been re-entered, the new entries are not allowed to
+ * change the structure of the table. If they would, they will discard the
+ * new data.
+ * This all makes use of the fact that there is no pre-emptive multi-tasking
+ * within the kernel (we can be interrupted, but the interrupt will execute
+ * atomically with respect to us).
+ *
+ * The three numbers below control the logging frequency and number of entries.
+ * Increasing MEM_STAT_SIZE will in general decrease the minimum time between
+ * logs (by making it less likely that the table will overflow).
+ * To prevent runaway logging, MEM_STAT_MIN_TIME allocations are required
+ * between logs. While the table is full, all extra entries are simply
+ * discarded, though if the size is in the table it is logged (this will
+ * skew the results.
+ * MEM_STAT_TIME is the number of kmallocs before it will dump, just for
+ * dumping's sake.
+ */
+
+ #define MEM_STAT_SIZE 100
+ #define MEM_STAT_MIN_TIME 2000
+ #define MEM_STAT_TIME 20000
+
+ struct mem_stats
+ {
+ int size;
+ int count;
+ int first_time;
+ int last_time;
+ void *last_addr;
+ } stat_buff[MEM_STAT_SIZE];
+
+ int stat_reenter = 0;
+ int stat_buff_ptr = 0;
+ int stat_buff_count = 0;
+ atomic_t stat_discard = 0;
+
+ static void
+ kmstats(int size, void *addr)
+ {
+ int sbc;
+ int safety, lock;
+
+ if (test_bit(2, &stat_reenter))
+ {
+ atomic_inc(&stat_discard);
+ return; /* bomb out */
+ }
+ safety = set_bit(1, &stat_reenter);
+ if (stat_buff_count++ >= MEM_STAT_TIME && !safety)
+ goto dump_stats;
+ for (sbc = 0; sbc < stat_buff_ptr; sbc++)
+ if (stat_buff[sbc].size == size)
+ goto got_stat_buff;
+ set_bit(2, &stat_reenter);
+ if (stat_buff_ptr >= MEM_STAT_SIZE)
+ goto safe_dump;
+ make_stat_buff:
+ sbc = stat_buff_ptr++;
+ stat_buff[sbc].size = size;
+ stat_buff[sbc].count = 0;
+ clear_bit(2, &stat_reenter);
+ stat_buff[sbc].first_time = jiffies;
+ got_stat_buff:
+ /* this following is not safe, but who cares... */
+ stat_buff[sbc].last_time = jiffies;
+ stat_buff[sbc].last_addr = addr;
+ stat_buff[sbc].count += 1;
+
+ stat_exit:
+ if (!safety)
+ clear_bit(1, &stat_reenter);
+ return;
+
+ safe_dump:
+ if (safety || stat_buff_count < MEM_STAT_MIN_TIME)
+ {
+ atomic_inc(&stat_discard);
+ clear_bit(2, &stat_reenter);
+ goto stat_exit; /* bomb out */
+ }
+ dump_stats:
+ lock = set_bit(2, &stat_reenter);
+ if (stat_buff_ptr > MEM_STAT_SIZE)
+ stat_buff_ptr = MEM_STAT_SIZE;
+ if (stat_discard > 0)
+ { /* might lose some here, but we don't care too much */
+ printk("mem_discard: %d\n", stat_discard);
+ stat_discard = 0;
+ }
+ for (sbc = 0; sbc < stat_buff_ptr; sbc++)
+ {
+ printk("mem_stats: %d(%d) [%x->%x] %p\n",
+ stat_buff[sbc].size, stat_buff[sbc].count,
+ stat_buff[sbc].first_time, stat_buff[sbc].last_time,
+ stat_buff[sbc].last_addr);
+ stat_buff[sbc].size = -1;
+ }
+ stat_buff_ptr = 0;
+ stat_buff_count = 0;
+ if (!lock)
+ clear_bit(2, &stat_reenter);
+ goto make_stat_buff;
+ }
+ #endif KMALLOC_STATS
+

/*
* Ugh, this is ugly, but we want the default case to run
***************
*** 237,242 ****
--- 353,361 ----
struct page_descriptor *page, **pg;
struct size_descriptor *bucket = sizes;

+ #ifdef KMALLOC_STATS
+ kmstats(size, __builtin_return_address(0));
+ #endif
/* Get order */
order = 0;
{