Logging disk accesses.

Rogier Wolff (R.E.Wolff@BitWizard.nl)
Mon, 1 Jun 1998 23:42:40 +0200 (MET DST)


Hi,

Here is a patch that allows you to monitor disk accesses. This is
useful to determine the actual disk access patterns of a system. For
example, you could consider "defragging" the system such that your
boot process becomes a linear read from disk.

Instructions:

Patch your 2.0.3x kernel using the patch below. Configure the new
options, and After booting the new kernel, you will find a
/proc/blkdev_log file that has all disk accesses. A "cat" of this file
will show everything since last time you looked. "tail -f" works
nicely too.

The output is jiffies, r/w, device, blockno.

If the ringbuffer overflows, this is also noted.

If you want to monitor your boot process, you'd better empty the
buffer closely after fsck-ing the root filesystem. Or define a much
larger ringbuffer than the default 0x1000 entries (48 kbytes of kernel
memory)

A userlevel program could monitor this file and tell you what files
are being accessed.

The code quality is such that I wouldn't want it into the standard
kernel yet. Some stuff is "in the wrong place".

Anybody who tests this, please tell me. I'm interested.....

Regards,

Roger.

diff -ur linux-2.0.33.clean/drivers/block/Config.in linux/drivers/block/Config.in
--- linux-2.0.33.clean/drivers/block/Config.in Mon Aug 4 20:45:55 1997
+++ linux/drivers/block/Config.in Mon Jun 1 16:01:28 1998
@@ -52,6 +52,10 @@
fi
tristate 'XT harddisk support' CONFIG_BLK_DEV_XD

+bool 'Log accesses to block devices' CONFIG_BLK_DEV_LOG
+if [ "$CONFIG_BLK_DEV_LOG" = "y" ]; then
+ hex 'log size' CONFIG_LOGSIZE 0x1000
+fi

if [ "$CONFIG_BLK_DEV_HD_IDE" = "y" -o "$CONFIG_BLK_DEV_HD_ONLY" = "y" ]; then
define_bool CONFIG_BLK_DEV_HD y
diff -ur linux-2.0.33.clean/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
--- linux-2.0.33.clean/drivers/block/ll_rw_blk.c Wed Feb 26 20:10:15 1997
+++ linux/drivers/block/ll_rw_blk.c Mon Jun 1 23:18:14 1998
@@ -280,6 +280,105 @@
sti();
}

+
+#ifdef CONFIG_BLK_DEV_LOG
+
+
+#include <linux/malloc.h>
+
+struct ringbuf_s {
+ long timestamp;
+ unsigned int blkdev;
+ unsigned int sector;
+};
+
+#define WRITEFLAG 0x80000000
+
+static struct ringbuf_s ringbuf[CONFIG_LOGSIZE];
+static int overrun = 0;
+static int head=0, tail=0;
+
+/* Return (part of) the block device access ringbuffer.
+ We simply return if we go near the pagelimit.
+ The next read will return the rest.
+ We return EOF when there is nothing in the buffer.
+ Use "tail -f" to monitor the file. */
+
+int get_blockdev_log (char *buffer)
+{
+ char *p = buffer;
+
+ while (tail != head) {
+ if (overrun) {
+ p += sprintf (p, "overrun: %d\n", overrun);
+ overrun = 0;
+ }
+ p += sprintf (p, "%ld %c %04x %x\n",
+ ringbuf[tail].timestamp,
+ ringbuf[tail].blkdev & WRITEFLAG? 'w':'r',
+ ringbuf[tail].blkdev &~WRITEFLAG,
+ ringbuf[tail].sector);
+ tail++;
+ if (tail > CONFIG_LOGSIZE) tail = 0;
+ if (p-buffer > 900) break;
+ }
+ return p-buffer;
+}
+
+static int blk_dev_log_read(struct inode * inode, struct file * file,
+ char * buf, int count)
+{
+ char *p;
+ int cnt;
+
+ p = kmalloc (1000, GFP_USER);
+ if (!p) return -ENOMEM;
+
+ cnt = get_blockdev_log (p);
+
+ memcpy_tofs (buf, p, cnt);
+
+ kfree (p);
+ return cnt;
+}
+static struct file_operations proc_log_operations = {
+ NULL, /* array_lseek */
+ blk_dev_log_read,
+ NULL, /* array_write */
+ NULL, /* array_readdir */
+ NULL, /* array_select */
+ NULL, /* array_ioctl */
+ NULL, /* mmap */
+ NULL, /* no special open code */
+ NULL, /* no special release code */
+ NULL /* can't fsync */
+};
+
+struct inode_operations proc_blk_dev_log_inode_operations = {
+ &proc_log_operations, /* default base directory file-ops */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* readpage */
+ NULL, /* writepage */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+#endif
+
+
+
+
static void make_request(int major,int rw, struct buffer_head * bh)
{
unsigned int sector, count;
@@ -288,6 +387,26 @@

count = bh->b_size >> 9;
sector = bh->b_rsector;
+
+#ifdef CONFIG_BLK_DEV_LOG
+ {
+ int nexthead;
+
+ nexthead = head+1;
+ if (nexthead > CONFIG_LOGSIZE) nexthead = 0;
+
+ if (nexthead == tail) {
+ tail++;
+ if (tail > CONFIG_LOGSIZE) tail = 0;
+ overrun++;
+ }
+ ringbuf[nexthead].timestamp = jiffies;
+ ringbuf[nexthead].blkdev = bh->b_dev;
+ if (rw) ringbuf[nexthead].blkdev |= WRITEFLAG;
+ ringbuf[nexthead].sector = sector;
+ head = nexthead;
+ }
+#endif

/* Uhhuh.. Nasty dead-lock possible here.. */
if (buffer_locked(bh))
diff -ur linux-2.0.33.clean/fs/proc/root.c linux/fs/proc/root.c
--- linux-2.0.33.clean/fs/proc/root.c Tue Apr 30 12:09:45 1996
+++ linux/fs/proc/root.c Mon Jun 1 23:10:48 1998
@@ -25,6 +25,10 @@

static unsigned char proc_alloc_map[PROC_NDYNAMIC / 8] = {0};

+#ifdef CONFIG_BLK_DEV_LOG
+extern struct inode_operations proc_blk_dev_log_inode_operations;
+#endif
+
/*
* These are the generic /proc directory operations. They
* use the in-memory "struct proc_dir_entry" tree to parse
@@ -250,6 +254,7 @@
NULL /* permission */
};

+
void proc_root_init(void)
{
static int done = 0;
@@ -352,6 +357,15 @@
PROC_CMDLINE, 7, "cmdline",
S_IFREG | S_IRUGO, 1, 0, 0,
});
+
+#ifdef CONFIG_BLK_DEV_LOG
+
+ proc_register(&proc_root, &(struct proc_dir_entry) {
+ PROC_BLK_DEV_LOG, 12, "blockdev_log",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_blk_dev_log_inode_operations,
+ });
+#endif
#ifdef CONFIG_RTC
proc_register(&proc_root, &(struct proc_dir_entry) {
PROC_RTC, 3, "rtc",
diff -ur linux-2.0.33.clean/include/linux/proc_fs.h linux/include/linux/proc_fs.h
--- linux-2.0.33.clean/include/linux/proc_fs.h Mon Jun 1 17:45:38 1998
+++ linux/include/linux/proc_fs.h Mon Jun 1 18:14:29 1998
@@ -43,6 +43,7 @@
PROC_MTAB,
PROC_MD,
PROC_RTC,
+ PROC_BLK_DEV_LOG,
PROC_LOCKS
};

-- 
If it's there and you can see it, it's REAL      |___R.E.Wolff@BitWizard.nl  |
If it's there and you can't see it, it's TRANSPARENT |  Tel: +31-15-2137555  |
If it's not there and you can see it, it's VIRTUAL   |__FAX:_+31-15-2138217  |
If it's not there and you can't see it, it's GONE! -- Roy Wilks, 1983  |_____|

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu