utility patch: kernel stack overflow profiler

Ingo Molnar (mingo@pc5829.hil.siemens.at)
Sun, 9 Feb 1997 23:04:31 +0100 (MET)


so if you were seeing strange 'kernel stack corruption. Aiee' messages on
heavily loaded systems, this patch is for you. Now you can pester your
favorite kernel driver hackers with annoyingly accurate kernel oops-es. No
'sorry we dont know what causes it' and no 'that must be an SMP problem'
answers possible anymore.

the patch abuses gcc-instrumenting features (the original idea is from
Linus), to add runtime kernel stack overflow checking. Whenever the
'available free stack' value goes below a certain treshold (500 bytes
currently), an artifical kernel oops is generated. This method is much
more portable than the [now obsolete] 'kfaultd' patch. It should be much
easier to integrate it into 2.0 as well.

The patch is against 2.1.26. It adds a new entry to the 'kernel hacking'
options, you have to turn it on and have to do a 'make clean' after that.
[you might want to recompile your kernel as well, but only if you want]

It's tested on the x86 platform, but other platforms should be fine too.
Probably the only thing you have to do is to add the config option to your
arch/*/config.in file. Send me a report if it works.

Comments, ideas welcome.

-- mingo

--- /pdl/linux/linux-2.1.26_orig/arch/i386/config.in Thu Jan 16 03:46:06 1997
+++ linux/arch/i386/config.in Sun Feb 9 17:54:25 1997
@@ -111,4 +111,5 @@
if [ "$CONFIG_PROFILE" = "y" ]; then
int ' Profile shift count' CONFIG_PROFILE_SHIFT 2
fi
+bool 'Debug kernel stack overflows' CONFIG_DEBUG_KSTACK
endmenu
--- /pdl/linux/linux-2.1.26_orig/include/linux/kernel.h Sun Dec 1 10:06:55 1996
+++ linux/include/linux/kernel.h Sun Feb 9 20:34:45 1997
@@ -18,7 +18,12 @@
#define LONG_MAX ((long)(~0UL>>1))
#define ULONG_MAX (~0UL)

-#define STACK_MAGIC 0xdeadbeef
+#define KSTACK_MAGIC 0xdeadbeef
+
+/*
+ * Treshold that triggers kernel stack overflow oopses.
+ */
+#define KSTACK_TRESHOLD 500

#define KERN_EMERG "<0>" /* system is unusable */
#define KERN_ALERT "<1>" /* action must be taken immediately */
diff --exclude-from=exclude_from.prof -ru --new-file /pdl/linux/linux-2.1.26_orig/kernel/Makefile linux/kernel/Makefile
--- /pdl/linux/linux-2.1.26_orig/kernel/Makefile Wed Jan 10 08:27:39 1996
+++ linux/kernel/Makefile Sun Feb 9 21:13:25 1997
@@ -11,7 +11,7 @@
$(CPP) -traditional $< -o $*.s

O_TARGET := kernel.o
-O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
+O_OBJS = profile.o sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \
module.o exit.o signal.o itimer.o info.o time.o softirq.o \
resource.o sysctl.o

@@ -23,3 +23,6 @@

sched.o: sched.c
$(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
+
+profile.o: profile.c
+ $(CC) -Wall -Wstrict-prototypes -fno-omit-frame-pointer -O2 -c $<
diff --exclude-from=exclude_from.prof -ru --new-file /pdl/linux/linux-2.1.26_orig/kernel/exit.c linux/kernel/exit.c
--- /pdl/linux/linux-2.1.26_orig/kernel/exit.c Sun Jan 26 11:07:48 1997
+++ linux/kernel/exit.c Sun Feb 9 17:45:52 1997
@@ -129,7 +129,7 @@
task[i] = NULL;
REMOVE_LINKS(p);
release_thread(p);
- if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
+ if (KSTACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
free_kernel_stack(p->kernel_stack_page);
current->cmin_flt += p->min_flt + p->cmin_flt;
diff --exclude-from=exclude_from.prof -ru --new-file /pdl/linux/linux-2.1.26_orig/kernel/fork.c linux/kernel/fork.c
--- /pdl/linux/linux-2.1.26_orig/kernel/fork.c Sun Jan 26 12:40:46 1997
+++ linux/kernel/fork.c Sun Feb 9 20:36:59 1997
@@ -246,7 +246,10 @@
p->did_exec = 0;
p->swappable = 0;
p->kernel_stack_page = new_stack;
- *(unsigned long *) p->kernel_stack_page = STACK_MAGIC;
+ *(unsigned long *) p->kernel_stack_page = KSTACK_MAGIC;
+#ifdef CONFIG_DEBUG_KSTACK
+ *(unsigned long *) (p->kernel_stack_page + KSTACK_TRESHOLD) = KSTACK_MAGIC;
+#endif
p->state = TASK_UNINTERRUPTIBLE;
p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
p->flags |= PF_FORKNOEXEC;
diff --exclude-from=exclude_from.prof -ru --new-file /pdl/linux/linux-2.1.26_orig/kernel/profile.c linux/kernel/profile.c
--- /pdl/linux/linux-2.1.26_orig/kernel/profile.c Sun Feb 9 21:05:41 1997
+++ linux/kernel/profile.c Sun Feb 9 22:34:26 1997
@@ -0,0 +1,71 @@
+/*
+ * linux/kernel/sched.c
+ *
+ * Copyright (C) 1997 Ingo Molnar
+ */
+
+/*
+ * 'profile.c' implements the profiling hook 'mcount', generated by GCC -pg
+ *
+ * Currently used for monitoring stack usage. Has to be a separate C module,
+ * because we have to compile it without -pg, to avoid recursion.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_DEBUG_KSTACK
+
+int mcount_ready=0; /* deal with too early calls to mcount() */
+
+extern long init_kernel_stack[1024];
+
+void mcount_init (void)
+{
+ printk("<mcount_init() done>\n");
+
+ /*
+ * Hackish, but safe. The init thread isnt running yet.
+ */
+ *(unsigned long *)(((unsigned long)&init_kernel_stack)+KSTACK_TRESHOLD)=KSTACK_MAGIC;
+
+ /*
+ * Ok, from now on it's for real:
+ */
+ mcount_ready=1;
+}
+
+void mcount (void)
+{
+ if (!mcount_ready)
+ return;
+
+ if (KSTACK_MAGIC != *(unsigned long *)
+ (current->kernel_stack_page+KSTACK_TRESHOLD)) {
+
+ /*
+ * Avoid recursion:
+ */
+ *(unsigned long *) (current->kernel_stack_page+KSTACK_TRESHOLD) = KSTACK_MAGIC;
+
+ printk(KERN_ALERT "kernel stack overflow. Forcing Oops.\n");
+
+ /*
+ * Force a fault. In C it's this easy ;)
+ */
+ *((char *)0)=0;
+ }
+}
+
+/*
+ * These are dummies to trick the linker. The kernel is not a normal Linux
+ * executable, and we redefine mcount() anyways. If monstartup() got started
+ * before start_kernel, we could avoid the 'mcount_ready' flag.
+ */
+
+void monstartup (void) {}
+
+void _mcleanup (void) {}
+
+#endif
+
diff --exclude-from=exclude_from.prof -ru --new-file /pdl/linux/linux-2.1.26_orig/kernel/sched.c linux/kernel/sched.c
--- /pdl/linux/linux-2.1.26_orig/kernel/sched.c Sat Feb 8 12:00:08 1997
+++ linux/kernel/sched.c Sun Feb 9 21:30:51 1997
@@ -87,8 +87,8 @@

extern void mem_use(void);

-static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
-unsigned long init_user_stack[1024] = { STACK_MAGIC, };
+unsigned long init_kernel_stack[1024] = { KSTACK_MAGIC, };
+unsigned long init_user_stack[1024] = { KSTACK_MAGIC, };
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
static struct files_struct init_files = INIT_FILES;
@@ -1737,3 +1737,4 @@
init_bh(TQUEUE_BH, tqueue_bh);
init_bh(IMMEDIATE_BH, immediate_bh);
}
+
--- /pdl/linux/linux-2.1.26_orig/Makefile Sat Feb 8 11:59:42 1997
+++ linux/Makefile Sun Feb 9 20:35:26 1997
@@ -61,6 +61,10 @@
do-it-all: config
endif

+ifeq ($(CONFIG_DEBUG_KSTACK),y)
+PROFILEFLAGS =-pg
+endif
+
#
# ROOT_DEV specifies the default root-device when making the image.
# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
@@ -88,7 +92,7 @@
# standard CFLAGS
#

-CFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+CFLAGS = -Wall -Wstrict-prototypes $(PROFILEFLAGS) -O2 -fno-omit-frame-pointer

ifdef CONFIG_CPP
CFLAGS := $(CFLAGS) -x c++
--- /pdl/linux/linux-2.1.26_orig/init/main.c Sun Feb 2 14:53:10 1997
+++ linux/init/main.c Sun Feb 9 22:28:37 1997
@@ -70,6 +70,9 @@
extern long mca_init(long, long);
extern long sbus_init(long, long);
extern void sysctl_init(void);
+#ifdef CONFIG_DEBUG_KSTACK
+extern void mcount_init(void);
+#endif

extern void smp_setup(char *str, int *ints);
extern void no_scroll(char *str, int *ints);
@@ -897,6 +900,9 @@
smp_init();
#endif
sysctl_init();
+#ifdef CONFIG_DEBUG_KSTACK
+ mcount_init();
+#endif
/*
* We count on the initial thread going ok
* Like idlers init is an unlocked kernel thread, which will
--- /pdl/linux/linux-2.1.26_orig/Documentation/Configure.help Sat Feb 8 11:59:41 1997
+++ linux/Documentation/Configure.help Sun Feb 9 21:55:25 1997
@@ -4419,6 +4419,14 @@
said Y to "Kernel profiling support", you must be a kernel hacker and
hence you know what this is about :-)

+Debug kernel stack overflows
+CONFIG_DEBUG_KSTACK
+ If you see "kernel stack corruption. Aiee" messages, and a kernel
+ hacker told you to 'switch on kernel stack debugging', then this
+ is the right option =B-)
+ Do 'make clean' after changing this option!
+ For normal systems, this option adds noticeable overhead, so say N.
+
ISDN subsystem
CONFIG_ISDN
ISDN ("Integrated Services Digital Networks", called RNIS in France)
--- /pdl/linux/linux-2.1.26_orig/fs/umsdos/namei.c Sat Nov 30 11:24:02 1996
+++ linux/fs/umsdos/namei.c Sun Feb 9 18:17:24 1997
@@ -295,9 +295,9 @@
}

#define chkstk() \
- if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page){\
+ if (KSTACK_MAGIC != *(unsigned long *)current->kernel_stack_page){\
printk(KERN_ALERT "UMSDOS: %s magic %x != %lx ligne %d\n" \
- , current->comm,STACK_MAGIC \
+ , current->comm,KSTACK_MAGIC \
,*(unsigned long *)current->kernel_stack_page \
,__LINE__); \
}
--- /pdl/linux/linux-2.1.26_orig/arch/i386/kernel/traps.c Sat Feb 8 11:59:46 1997
+++ linux/arch/i386/kernel/traps.c Sun Feb 9 18:34:23 1997
@@ -141,7 +141,7 @@
printk("ds: %04x es: %04x ss: %04x\n",
regs->xds & 0xffff, regs->xes & 0xffff, ss);
store_TR(i);
- if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
+ if (KSTACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
printk("Corrupted stack page\n");
printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
current->comm, current->pid, 0xffff & i, current->kernel_stack_page);