ext2/3 performance regression in 2.6 vs 2.4 for small interleavedwrites

From: Jon Burgess
Date: Wed Feb 11 2004 - 14:06:54 EST


I wrote a small benchmark tool to simulate the pattern of writes which occur when slowly streaming files to disk.
This is trying to replicate the filesystem activity when I record multiple TV and radio channels to disk.

I have attached a copy of the test program. It measures how long it takes to write a number of files in parallel, writing a small amount of data to each file at a time. I noticed that results for ext2 on 2.6.2 are much slower than 2.4.22:

Write speed in MB/s using an ext2 filesystem for 1 and 2 streams:
Num streams: 1 2
linux-2.4.22 10.47 6.98
linux-2.6.2 9.71 0.34

"vmstat" agrees with the performance figures. It seems that the pattern of small interleaved writes to two files really upsets something in the 2.6 code.

During the disk light is on solid and it really slows any other disk access. It looks like the disk is continuously seeking backwards and forwards, perhaps re-writing the meta data.

Does this look like a problem, or is the test unrealistic?

Thanks,
Jon


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/param.h>
#include <string.h>
#include <sys/time.h>

#define BSIZE (4 * 1024)

#define MAX_NAME (256)
char base_name[MAX_NAME];

char *get_name(int n)
{
static char name[MAX_NAME + 5];

sprintf(name, "%s%d", base_name, n);
return name;
}


void display_rate(struct timeval start, struct timeval end, int len)
{
int d_s, d_us;
float sec;

d_s = end.tv_sec - start.tv_sec;
d_us = end.tv_usec - start.tv_usec;

sec = d_s + d_us / 1000000.0;

printf("Transferred %dMb of data in %.2f seconds (%.2fMb/s)\n",
len, sec, len / sec);
fflush(NULL);
}

void create_files(int n, int sz)
{
int out[n], i;
char buf[BSIZE];
int pos;
struct timeval start, end;

printf("Writing %dMb of data to %d files in parallel\n", sz, n);
fflush(NULL);

for (i = 0; i < n; i++) {
out[i] = open(get_name(i), O_WRONLY | O_CREAT | O_TRUNC, 0666);
if (out[i] < 0) {
perror("Creating output file");
exit(1);
}
}

memset(buf, 0, BSIZE);

gettimeofday(&start, NULL);

for (pos = 0; pos < (sz * 1024 * 1024); pos += BSIZE) {
for(i = 0; i < n; i++) {
if (write(out[i], buf, BSIZE) != BSIZE) {
fprintf(stderr, "Problem writing output file\n");
exit(2);
}
}
}

for (i=0; i<n; i++) {
fdatasync(out[i]);
close(out[i]);
}

gettimeofday(&end, NULL);

display_rate(start, end, n * pos / (1024 * 1024));
}


void delete_files(int n)
{
int i;

for (i = 0; i < n; i++) {
unlink(get_name(i));
}
}

void run_test(int n, int s)
{
delete_files(n);

create_files(n, s);

delete_files(n);

printf("\n");
fflush(NULL);
}

int main(int argc, char *argv[])
{
unsigned int s = 16;
strcpy(base_name, "temp_");

run_test(1, s);
run_test(2, s / 2);

return 0;
}