another small utility patch: ECCd

Ingo Molnar (mingo@pc5829.hil.siemens.at)
Wed, 8 Jan 1997 14:13:47 +0100 (MET)


Well it doesnt do ECC (yet), but it checksums all the read-only kernel
code pages and checks those checksums in idle time. This should catch
certain types of hardware memory errors and random stray scribbles from
kernel space.

the original idea is Janos Farkas's. The patch is against a clean 2.1.20.

--mingo

diff -ur --exclude-from=exclude_from linux-2.1.20_orig/mm/Makefile linux/mm/Makefile
--- linux-2.1.20_orig/mm/Makefile Tue Jan 7 20:07:41 1997
+++ linux/mm/Makefile Wed Jan 8 12:16:49 1997
@@ -8,7 +8,7 @@
# Note 2! The CFLAGS definition is now in the main makefile...

O_TARGET := mm.o
-O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+O_OBJS := eccd.o memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
kmalloc.o vmalloc.o \
swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o

diff -ur --exclude-from=exclude_from linux-2.1.20_orig/mm/eccd.c linux/mm/eccd.c
--- linux-2.1.20_orig/mm/eccd.c Wed Jan 8 14:02:00 1997
+++ linux/mm/eccd.c Wed Jan 8 13:55:33 1997
@@ -0,0 +1,136 @@
+/*
+ * linux/mm/eccd.c
+ *
+ * Copyright (C) 1997 Ingo Molnar
+ *
+ * This kernel thread uses up idle CPU time to generate checksums
+ * for read-only kernel code pages.
+ * The main purpose of this is to catch sporadic memory corruption.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/vmalloc.h>
+
+#define DEBUG_ECCD
+#ifdef DEBUG_ECCD
+#define dprintk(args...) printk(## args)
+#else
+#define dprintk(args...) { ; }/* nothing */
+#endif
+
+extern char start_kernel, _etext;
+
+unsigned long * page_checksums;
+unsigned long textpages;
+
+/*
+ * To achieve minumum system load, the checksum calculating function
+ * is 'interruptible'. It checks 'need_resched' after every 'chunk',
+ * and reschedules if set.
+ *
+ * CHECKSUM_CHUNK 16 means that a 4K page is checksummed in 256 byte
+ * 'chunks', between every chunk there is a chance to reschedule.
+ *
+ * CHECKSUM_CHUNK must be a power of 2.
+ *
+ * This all is to get out of the way of any ready-to-run process, as
+ * fast as possible.
+ */
+
+#define CHECKSUM_CHUNKS 16
+
+unsigned long calc_page_checksum(unsigned long page)
+{
+ unsigned long checksum=0;
+ unsigned long * page_addr = (unsigned long *)(&start_kernel
+ + (page << PAGE_SHIFT));
+ long i,j;
+
+ for (j=CHECKSUM_CHUNKS; j>0; j--) {
+
+ for (i=PAGE_SIZE/CHECKSUM_CHUNKS; i>0; i--) {
+ checksum += *page_addr;
+ page_addr++;
+ }
+
+ /*
+ * no-one is supposed to modify text pages, thus the
+ * reschedule is safe here.
+ */
+
+ if (need_resched)
+ schedule();
+ }
+ return checksum;
+}
+
+/*
+ * We simply vmalloc() the checksum array. Typical size is 1 byte per 1 kbyte
+ * kernel memory. Thus a system with a 4M kernel uses a 4K ECC checksum pool.
+ */
+
+void init_eccd (void)
+{
+ unsigned long size,i;
+ unsigned long checksum;
+
+ printk("ECCd: start_kernel: %08lX _etext (end kernel?): %08lX.\n",
+ (unsigned long) &start_kernel, (unsigned long) &_etext );
+
+ size = ((unsigned long) &_etext) - ((unsigned long) &start_kernel);
+ printk("ECCd: size %08lX.\n",size);
+
+ page_checksums = (unsigned long *) vmalloc(size);
+
+ if (!page_checksums)
+ panic("unable to allocate ECCD checksum pool.");
+
+ textpages = size >> PAGE_SHIFT;
+
+ printk("ECCd: textpages %08lX.\n",textpages);
+
+ printk("calculating checksums: ");
+ for (i=0; i<textpages; i++) {
+ checksum = calc_page_checksum(i);
+ printk("[%ld:%08lX] ",i,checksum);
+ page_checksums[i]=checksum;
+ }
+ printk(".\n");
+}
+
+int eccd(void * unused)
+{
+ unsigned long current_page=0,checksum;
+
+ printk("eccd started.\n");
+
+ init_eccd();
+
+ current->session = 1;
+ current->pgrp = 1;
+ sprintf(current->comm, "eccd");
+ current->priority = 0;
+
+ /*
+ * Never ending loop, computing checksums all the time:
+ */
+ for (;;) {
+ checksum = calc_page_checksum (current_page);
+
+ if (page_checksums[current_page] != checksum) {
+ printk("ECCD: oops, wrong checksum %08lX for address %08lX!!.\n",
+ checksum, (current_page<<PAGE_SHIFT) +
+ ((unsigned long) &start_kernel));
+ }
+ current_page++;
+ if (current_page>=textpages)
+ current_page=0;
+
+ schedule();
+ }
+}
+
+
+
diff -ur --exclude-from=exclude_from linux-2.1.20_orig/init/main.c linux/init/main.c
--- linux-2.1.20_orig/init/main.c Tue Jan 7 20:07:41 1997
+++ linux/init/main.c Wed Jan 8 14:06:00 1997
@@ -61,6 +61,8 @@
extern int bdflush(void *);
extern int kswapd(void *);

+extern int eccd(void * unused);
+
extern void init_IRQ(void);
extern void init_modules(void);
extern long console_init(long, long);
@@ -905,6 +907,12 @@
* make syscalls (and thus be locked).
*/
kernel_thread(init, NULL, 0);
+
+ /*
+ * Launch ECCd
+ */
+ kernel_thread(eccd, NULL, 0);
+
/*
* task[0] is meant to be used as an "idle" task: it may not sleep, but
* it might do some general things like count free pages or it could be