Re: RAID0: performance oddity (2.2.0pre7)

Gadi Oxman (gadio@netvision.net.il)
Sun, 17 Jan 1999 22:49:51 +0200 (IST)


On Sun, 17 Jan 1999, Chris Evans wrote:

> Hi,
>
> I have a raid0 array of 2 disks, both of which can do ~6.5Mb/sec.
>
> With chunksize = 4k (default), the md0 device can shift ~11.5Mb/sec. Not
> bad.
>
> However bumping up the chunksize to 64k _decreases_ performance on the
> array to ~6Mb/sec. This is slower than either individual disk!
>
> I would have thought increasing chunksize would increase performance due
> to decreased overhead?
>
> Performance measurements are taken using "hdparm"
>
> Cheers
> Chris
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.rutgers.edu
> Please read the FAQ at http://www.tux.org/lkml/

hdparm is using fs/block_dev.c, which will queue 64 buffers of pending
I/O to the device at a time. With the default 1kB block size and a 64kB
chunk size, we will not queue I/O to both drives simultaneously.

Note that performance through the filesystem is probably already improved
by moving from 4kB to 64kB chunk size -- the following patch will potentially
increase performance only for direct access through the /dev/md0 device.

Gadi

--- linux/fs/block_dev.c.old Sun Jan 17 22:31:58 1999
+++ linux/fs/block_dev.c Sun Jan 17 22:45:27 1999
@@ -15,6 +15,7 @@

#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
#define NBUF 64
+#define READAHEAD_SECTORS (128 * 4 * 2)

ssize_t block_write(struct file * filp, const char * buf,
size_t count, loff_t *ppos)
@@ -152,12 +153,13 @@
size_t blocks, rblocks, left;
int bhrequest, uptodate;
struct buffer_head ** bhb, ** bhe;
- struct buffer_head * buflist[NBUF];
- struct buffer_head * bhreq[NBUF];
+ struct buffer_head ** buflist;
+ struct buffer_head ** bhreq;
unsigned int chars;
loff_t size;
kdev_t dev;
ssize_t read;
+ int nbuf;

dev = inode->i_rdev;
blocksize = BLOCK_SIZE;
@@ -187,6 +189,18 @@
left = count;
if (left <= 0)
return 0;
+
+ if ((buflist = (struct buffer_head **) __get_free_page(GFP_KERNEL)) == NULL)
+ return -ENOMEM;
+ if ((bhreq = (struct buffer_head **) __get_free_page(GFP_KERNEL)) == NULL) {
+ free_page((unsigned long) buflist);
+ return -ENOMEM;
+ }
+
+ nbuf = READAHEAD_SECTORS / (blocksize >> 9);
+ if (nbuf > PAGE_SIZE / sizeof(struct buffer_head *))
+ nbuf = PAGE_SIZE / sizeof(struct buffer_head *);
+
read = 0;
block = offset >> blocksize_bits;
offset &= blocksize-1;
@@ -194,16 +208,23 @@
rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
bhb = bhe = buflist;
if (filp->f_reada) {
+#if 0
if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
+#else
+ blocks += read_ahead[MAJOR(dev)] / (blocksize >> 9);
+#endif
if (rblocks > blocks)
blocks = rblocks;

}
if (block + blocks > size) {
blocks = size - block;
- if (blocks == 0)
+ if (blocks == 0) {
+ free_page((unsigned long) buflist);
+ free_page((unsigned long) bhreq);
return 0;
+ }
}

/* We do this in a two stage process. We first try to request
@@ -227,7 +248,7 @@
bhreq[bhrequest++] = *bhb;
}

- if (++bhb == &buflist[NBUF])
+ if (++bhb == &buflist[nbuf])
bhb = buflist;

/* If the block we have on hand is uptodate, go ahead
@@ -248,7 +269,7 @@
wait_on_buffer(*bhe);
if (!buffer_uptodate(*bhe)) { /* read error? */
brelse(*bhe);
- if (++bhe == &buflist[NBUF])
+ if (++bhe == &buflist[nbuf])
bhe = buflist;
left = 0;
break;
@@ -270,7 +291,7 @@
put_user(0,buf++);
}
offset = 0;
- if (++bhe == &buflist[NBUF])
+ if (++bhe == &buflist[nbuf])
bhe = buflist;
} while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
} while (left > 0);
@@ -278,9 +299,12 @@
/* Release the read-ahead blocks */
while (bhe != bhb) {
brelse(*bhe);
- if (++bhe == &buflist[NBUF])
+ if (++bhe == &buflist[nbuf])
bhe = buflist;
};
+
+ free_page((unsigned long) buflist);
+ free_page((unsigned long) bhreq);
if (!read)
return -EIO;
filp->f_reada = 1;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/