Re: 3ware 6410 RAID 10 performance?

From: Larry McVoy (lm@bitmover.com)
Date: Fri May 04 2001 - 16:29:15 EST


On Fri, May 04, 2001 at 02:03:35PM -0700, Adam Radford wrote:
> Larry,
>
> If there's anything to fix in the driver for this problem I'd be interested,
> however I have not seen this problem before.
>
> What benchmark (and options) are you running? bonnie++ ?
>
> BTW... I am the author of the Linux driver.

First let me say I really like this card. It's definitely the right
idea and when it works it screams. I used to work on really large I/O
systems at SGI so I have a fair amount of experience in this area (the
last system I worked on images the entire earth down to about 32 point
fonts, pretty wacky, our tax dollars at work).

Anyway, the benchmark I use is a program called lmdd, it's part of
lmbench but I've pulled all the files together into one and included
it below. It's really a handy tool, lots of different file system and
disk drive people use it. For years, I've known I need to wrap a shell
script around it so that we could toss stuff like bonnie and iozone.

The typical ways I run it

        # allocate and touch a buffer to flush memory
        lmdd opat=1 bs=512m count=1

        # write a file, watching for variation in write times
        lmdd of=XXX bs=100m wt=1 move=2g

        # read a file, watching for variation in read times
        lmdd if=XXX bs=100m rt=1 move=2g

lmdd is a lot like dd(1) but has the following key differences

        a) the default if is an interal form of if=/dev/zero, it
           doesn't bother with the read from /dev/zero.
        b) the default of is an interal form of of=/dev/null, it
           doesn't bother with the write to /dev/null.
        c) default blocksize is 8KB

On top of that, it has a number of options not found in dd

        fork=1 fork to do write I/O
        fsync=1 fsync output before exit
        ipat=1 check input for a pattern (see opat)
        mismatch=1 use with ipat, stop at first mismatch
        move=<amt> move <amt> bytes; takes k, m, and g suffices
                        k/m/g are powers of ten, not 2, the disk people
                        want powers of ten. Yeah, it sucks.
        opat=1 put a pattern in the output stream
        rand=<sz> do randoms over this size
        print=<p> different output formats, you want print=1 with rand
        rt=1 time and report results for each read
        wt=1 time and report results for each write
        srand=seed seed the random number generator, used with rand.

There are others, read the source for them, those are the useful ones.

Another tidbit of data on the 3ware card issues - it is definitely associated
with that card or the drives on that card; if I do the same tests on a file
system which is not going through the 3ware card, I get repeatable 27MB/sec
performance. Yes, I did try it after the 3ware card was wedged, the
non 3ware performance is still good.

Adam, if you want to ssh in and play with the machine directly, call me
at 415-401-8808 x101 and I'll set you up immediately.

Thanks,

--lm

/*
 * $Id$
 */
#ifndef _BENCH_H
#define _BENCH_H

#ifdef WIN32
#include <windows.h>
typedef unsigned char bool_t;
#endif

#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#ifndef WIN32
#include <unistd.h>
#endif
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <errno.h>
#ifndef WIN32
#include <strings.h>
#endif
#include <sys/types.h>
#ifndef WIN32
#include <sys/mman.h>
#endif
#include <sys/stat.h>
#ifndef WIN32
#include <sys/wait.h>
#include <time.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/resource.h>
#define PORTMAP
#include <rpc/rpc.h>
#endif

typedef unsigned int u32;

#ifdef HAVE_u64_t
typedef u64_t u64;
#else
typedef unsigned long long u64;
#endif

#define NO_PORTMAPPER /* needs to be up here, lib_*.h look at it */
#include "stats.h"
#include "timing.h"

#ifdef DEBUG
# define debug(x) fprintf x
#else
# define debug(x)
#endif
#ifdef NO_PORTMAPPER
#define TCP_SELECT -31233
#define TCP_XACT -31234
#define TCP_CONTROL -31235
#define TCP_DATA -31236
#define TCP_CONNECT -31237
#define UDP_XACT -31238
#define UDP_DATA -31239
#else
#define TCP_SELECT (u_long)404038 /* XXX - unregistered */
#define TCP_XACT (u_long)404039 /* XXX - unregistered */
#define TCP_CONTROL (u_long)404040 /* XXX - unregistered */
#define TCP_DATA (u_long)404041 /* XXX - unregistered */
#define TCP_CONNECT (u_long)404042 /* XXX - unregistered */
#define UDP_XACT (u_long)404032 /* XXX - unregistered */
#define UDP_DATA (u_long)404033 /* XXX - unregistered */
#define VERS (u_long)1
#endif

#define UNIX_CONTROL "/tmp/lmbench.ctl"
#define UNIX_DATA "/tmp/lmbench.data"
#define UNIX_LAT "/tmp/lmbench.lat"

/*
 * socket send/recv buffer optimizations
 */
#define SOCKOPT_READ 0x0001
#define SOCKOPT_WRITE 0x0002
#define SOCKOPT_RDWR 0x0003
#define SOCKOPT_PID 0x0004
#define SOCKOPT_REUSE 0x0008
#define SOCKOPT_NONE 0

#ifndef SOCKBUF
#define SOCKBUF (1024*1024)
#endif

#ifndef XFERSIZE
#define XFERSIZE (64*1024) /* all bandwidth I/O should use this */
#endif

#if defined(SYS5) || defined(WIN32)
#define bzero(b, len) memset(b, 0, len)
#define bcopy(s, d, l) memcpy(d, s, l)
#define rindex(s, c) strrchr(s, c)
#endif
#define gettime usecs_spent
#define streq !strcmp
#define ulong unsigned long

#ifdef WIN32
#include <process.h>
#define getpid _getpid
#endif

#define SMALLEST_LINE 32 /* smallest cache line size */
#define TIME_OPEN2CLOSE

#define GO_AWAY signal(SIGALRM, exit); alarm(60 * 60);
#define REAL_SHORT 50000
#define SHORT 1000000
#define MEDIUM 2000000
#define LONGER 7500000 /* for networking data transfers */
#define ENOUGH REAL_SHORT

#define TRIES 11

typedef struct {
        int N;
        u64 u[TRIES];
        u64 n[TRIES];
} result_t;
void insertinit(result_t *r);
void insertsort(u64, u64, result_t *);
void save_median();
void save_minimum();
void save_results(result_t *r);
void get_results(result_t *r);

#define BENCHO(loop_body, overhead_body, enough) { \
        int __i, __N; \
        double __oh; \
        result_t __overhead, __r; \
        insertinit(&__overhead); insertinit(&__r); \
        __N = (enough == 0 || get_enough(enough) <= 100000) ? TRIES : 1;\
        if (enough < LONGER) {loop_body;} /* warm the cache */ \
        for (__i = 0; __i < __N; ++__i) { \
                BENCH1(overhead_body, enough); \
                if (gettime() > 0) \
                        insertsort(gettime(), get_n(), &__overhead); \
                BENCH1(loop_body, enough); \
                if (gettime() > 0) \
                        insertsort(gettime(), get_n(), &__r); \
        } \
        for (__i = 0; __i < __r.N; ++__i) { \
                __oh = __overhead.u[__i] / (double)__overhead.n[__i]; \
                __r.u[__i] -= (u64)((double)__r.n[__i] * __oh); \
        } \
        save_results(&__r); \
}

#define BENCH(loop_body, enough) { \
        long __i, __N; \
        result_t __r; \
        insertinit(&__r); \
        __N = (enough == 0 || get_enough(enough) <= 100000) ? TRIES : 1;\
        if (enough < LONGER) {loop_body;} /* warm the cache */ \
        for (__i = 0; __i < __N; ++__i) { \
                BENCH1(loop_body, enough); \
                if (gettime() > 0) \
                        insertsort(gettime(), get_n(), &__r); \
        } \
        save_results(&__r); \
}

#define BENCH1(loop_body, enough) { \
        double __usecs; \
        BENCH_INNER(loop_body, enough); \
        __usecs = gettime(); \
        __usecs -= t_overhead() + get_n() * l_overhead(); \
        settime(__usecs >= 0. ? (u64)__usecs : 0.); \
}
        
#define BENCH_INNER(loop_body, enough) { \
        static u_long __iterations = 1; \
        int __enough = get_enough(enough); \
        u_long __n; \
        double __result = 0.; \
                                                                        \
        while(__result < 0.95 * __enough) { \
                start(0); \
                for (__n = __iterations; __n > 0; __n--) { \
                        loop_body; \
                } \
                __result = stop(0,0); \
                if (__result < 0.99 * __enough \
                    || __result > 1.2 * __enough) { \
                        if (__result > 150.) { \
                                double tmp = __iterations / __result; \
                                tmp *= 1.1 * __enough; \
                                __iterations = (u_long)(tmp + 1); \
                        } else { \
                                if (__iterations > (u_long)1<<27) { \
                                        __result = 0.; \
                                        break; \
                                } \
                                __iterations <<= 3; \
                        } \
                } \
        } /* while */ \
        save_n((u64)__iterations); settime((u64)__result); \
}

/*
 * Generated from msg.x which is included here:

        program XACT_PROG {
            version XACT_VERS {
                char
                RPC_XACT(char) = 1;
            } = 1;
        } = 3970;

 * Please do not edit this file.
 * It was generated using rpcgen.
 */

#include <rpc/types.h>

#define XACT_PROG ((u_long)404040)
#define XACT_VERS ((u_long)1)
#define RPC_XACT ((u_long)1)
#define RPC_EXIT ((u_long)2)
extern char *rpc_xact_1();
extern char *client_rpc_xact_1();

#endif /* _BENCH_H */
#ifndef _STATS_H
#define _STATS_H

#include "bench.h"
#include "timing.h"

#define ABS(x) ((x) < 0 ? -(x) : (x))

int int_compare(const void *a, const void *b);
int u64_compare(const void *a, const void *b);
int double_compare(const void *a, const void *b);

typedef int (*int_stat)(int *values, int size);
typedef u64 (*u64_stat)(u64 *values, int size);
typedef double (*double_stat)(double *values, int size);

int int_median(int *values, int size);
u64 u64_median(u64 *values, int size);
double double_median(double *values, int size);

int int_mean(int *values, int size);
u64 u64_mean(u64 *values, int size);
double double_mean(double *values, int size);

int int_min(int *values, int size);
u64 u64_min(u64 *values, int size);
double double_min(double *values, int size);

int int_max(int *values, int size);
u64 u64_max(u64 *values, int size);
double double_max(double *values, int size);

double int_stderr(int *values, int size);
double u64_stderr(u64 *values, int size);
double double_stderr(double *values, int size);

double int_bootstrap_stderr(int *values, int size, int_stat f);
double u64_bootstrap_stderr(u64 *values, int size, u64_stat f);
double double_bootstrap_stderr(double *values, int size, double_stat f);

void regression(double *x, double *y, double *sig, int n,
                   double *a, double *b, double *sig_a, double *sig_b,
                   double *chi2);

#endif /* _STATS_H */
/*
 * $Id$
 */
#ifndef _TIMING_H
#define _TIMING_H

char *p64(u64 big);
char *p64sz(u64 big);
double Delta(void);
double Now(void);
void adjust(int usec);
void bandwidth(u64 bytes, u64 times, int verbose);
int bytes(char *s);
void context(u64 xfers);
u64 delta(void);
int get_enough(int);
u64 get_n(void);
void kb(u64 bytes);
double l_overhead(void);
char last(char *s);
void latency(u64 xfers, u64 size);
void mb(u64 bytes);
void micro(char *s, u64 n);
void micromb(u64 mb, u64 n);
void milli(char *s, u64 n);
void morefds(void);
void nano(char *s, u64 n);
u64 now(void);
void ptime(u64 n);
void rusage(void);
void save_n(u64);
void settime(u64 usecs);
void start(struct timeval *tv);
u64 stop(struct timeval *begin, struct timeval *end);
u64 t_overhead(void);
double timespent(void);
void timing(FILE *out);
u64 tvdelta(struct timeval *, struct timeval *);
void tvsub(struct timeval *tdiff, struct timeval *t1, struct timeval *t0);
void use_int(int result);
void use_pointer(void *result);
u64 usecs_spent(void);
void touch(char *buf, int size);

#if defined(hpux) || defined(__hpux)
int getpagesize();
#endif

#endif /* _TIMING_H */
/*
 * a timing utilities library
 *
 * Requires 64bit integers to work.
 *
 * %W% %@%
 *
 * Copyright (c) 1994-1998 Larry McVoy.
 */
#define _LIB /* bench.h needs this */
#include "bench.h"

#define nz(x) ((x) == 0 ? 1 : (x))

/*
 * I know you think these should be 2^10 and 2^20, but people are quoting
 * disk sizes in powers of 10, and bandwidths are all power of ten.
 * Deal with it.
 */
#define MB (1000*1000.0)
#define KB (1000.0)

static struct timeval start_tv, stop_tv;
FILE *ftiming;
volatile u64 use_result_dummy; /* !static for optimizers. */
static u64 iterations;
static void init_timing(void);

#if defined(hpux) || defined(__hpux)
#include <sys/mman.h>
#endif

#ifdef RUSAGE
#include <sys/resource.h>
#define SECS(tv) (tv.tv_sec + tv.tv_usec / 1000000.0)
#define mine(f) (int)(ru_stop.f - ru_start.f)

static struct rusage ru_start, ru_stop;

void
rusage(void)
{
        double sys, user, idle;
        double per;

        sys = SECS(ru_stop.ru_stime) - SECS(ru_start.ru_stime);
        user = SECS(ru_stop.ru_utime) - SECS(ru_start.ru_utime);
        idle = timespent() - (sys + user);
        per = idle / timespent() * 100;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming, "real=%.2f sys=%.2f user=%.2f idle=%.2f stall=%.0f%% ",
            timespent(), sys, user, idle, per);
        fprintf(ftiming, "rd=%d wr=%d min=%d maj=%d ctx=%d\n",
            mine(ru_inblock), mine(ru_oublock),
            mine(ru_minflt), mine(ru_majflt),
            mine(ru_nvcsw) + mine(ru_nivcsw));
}

#endif /* RUSAGE */
/*
 * Redirect output someplace else.
 */
void
timing(FILE *out)
{
        ftiming = out;
}

/*
 * Start timing now.
 */
void
start(struct timeval *tv)
{
        if (tv == NULL) {
                tv = &start_tv;
        }
#ifdef RUSAGE
        getrusage(RUSAGE_SELF, &ru_start);
#endif
        (void) gettimeofday(tv, (struct timezone *) 0);
}

/*
 * Stop timing and return real time in microseconds.
 */
u64
stop(struct timeval *begin, struct timeval *end)
{
        if (end == NULL) {
                end = &stop_tv;
        }
        (void) gettimeofday(end, (struct timezone *) 0);
#ifdef RUSAGE
        getrusage(RUSAGE_SELF, &ru_stop);
#endif

        if (begin == NULL) {
                begin = &start_tv;
        }
        return tvdelta(begin, end);
}

u64
now(void)
{
        struct timeval t;
        u64 m;

        (void) gettimeofday(&t, (struct timezone *) 0);
        m = t.tv_sec;
        m *= 1000000;
        m += t.tv_usec;
        return (m);
}

double
Now(void)
{
        struct timeval t;

        (void) gettimeofday(&t, (struct timezone *) 0);
        return (t.tv_sec * 1000000.0 + t.tv_usec);
}

u64
delta(void)
{
        static struct timeval last;
        struct timeval t;
        struct timeval diff;
        u64 m;

        (void) gettimeofday(&t, (struct timezone *) 0);
        if (last.tv_usec) {
                tvsub(&diff, &t, &last);
                last = t;
                m = diff.tv_sec;
                m *= 1000000;
                m += diff.tv_usec;
                return (m);
        } else {
                last = t;
                return (0);
        }
}

double
Delta(void)
{
        struct timeval t;
        struct timeval diff;

        (void) gettimeofday(&t, (struct timezone *) 0);
        tvsub(&diff, &t, &start_tv);
        return (diff.tv_sec + diff.tv_usec / 1000000.0);
}

void
save_n(u64 n)
{
        iterations = n;
}

u64
get_n(void)
{
        return (iterations);
}

/*
 * Make the time spend be usecs.
 */
void
settime(u64 usecs)
{
        bzero((void*)&start_tv, sizeof(start_tv));
        stop_tv.tv_sec = usecs / 1000000;
        stop_tv.tv_usec = usecs % 1000000;
}

void
bandwidth(u64 bytes, u64 times, int verbose)
{
        struct timeval tdiff;
        double mb, secs;

        tvsub(&tdiff, &stop_tv, &start_tv);
        secs = tdiff.tv_sec;
        secs *= 1000000;
        secs += tdiff.tv_usec;
        secs /= 1000000;
        secs /= times;
        mb = bytes / MB;
        if (!ftiming) ftiming = stderr;
        if (verbose) {
                (void) fprintf(ftiming,
                    "%.4f MB in %.4f secs, %.4f MB/sec\n",
                    mb, secs, mb/secs);
        } else {
                if (mb < 1) {
                        (void) fprintf(ftiming, "%.6f ", mb);
                } else {
                        (void) fprintf(ftiming, "%.2f ", mb);
                }
                if (mb / secs < 1) {
                        (void) fprintf(ftiming, "%.6f\n", mb/secs);
                } else {
                        (void) fprintf(ftiming, "%.2f\n", mb/secs);
                }
        }
}

void
kb(u64 bytes)
{
        struct timeval td;
        double s, bs;

        tvsub(&td, &stop_tv, &start_tv);
        s = td.tv_sec + td.tv_usec / 1000000.0;
        bs = bytes / nz(s);
        if (!ftiming) ftiming = stderr;
        (void) fprintf(ftiming, "%.0f KB/sec\n", bs / KB);
}

void
mb(u64 bytes)
{
        struct timeval td;
        double s, bs;

        tvsub(&td, &stop_tv, &start_tv);
        s = td.tv_sec + td.tv_usec / 1000000.0;
        bs = bytes / nz(s);
        if (!ftiming) ftiming = stderr;
        (void) fprintf(ftiming, "%.2f MB/sec\n", bs / MB);
}

void
latency(u64 xfers, u64 size)
{
        struct timeval td;
        double s;

        if (!ftiming) ftiming = stderr;
        tvsub(&td, &stop_tv, &start_tv);
        s = td.tv_sec + td.tv_usec / 1000000.0;
        if (xfers > 1) {
                fprintf(ftiming, "%d %dKB xfers in %.2f secs, ",
                    (int) xfers, (int) (size / KB), s);
        } else {
                fprintf(ftiming, "%.1fKB in ", size / KB);
        }
        if ((s * 1000 / xfers) > 100) {
                fprintf(ftiming, "%.0f millisec%s, ",
                    s * 1000 / xfers, xfers > 1 ? "/xfer" : "s");
        } else {
                fprintf(ftiming, "%.4f millisec%s, ",
                    s * 1000 / xfers, xfers > 1 ? "/xfer" : "s");
        }
        if (((xfers * size) / (MB * s)) > 1) {
                fprintf(ftiming, "%.2f MB/sec\n", (xfers * size) / (MB * s));
        } else {
                fprintf(ftiming, "%.2f KB/sec\n", (xfers * size) / (KB * s));
        }
}

void
context(u64 xfers)
{
        struct timeval td;
        double s;

        tvsub(&td, &stop_tv, &start_tv);
        s = td.tv_sec + td.tv_usec / 1000000.0;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming,
            "%d context switches in %.2f secs, %.0f microsec/switch\n",
            (int)xfers, s, s * 1000000 / xfers);
}

void
nano(char *s, u64 n)
{
        struct timeval td;
        double micro;

        tvsub(&td, &stop_tv, &start_tv);
        micro = td.tv_sec * 1000000 + td.tv_usec;
        micro *= 1000;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming, "%s: %.0f nanoseconds\n", s, micro / n);
}

void
micro(char *s, u64 n)
{
        struct timeval td;
        double micro;

        tvsub(&td, &stop_tv, &start_tv);
        micro = td.tv_sec * 1000000 + td.tv_usec;
        micro /= n;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming, "%s: %.4f microseconds\n", s, micro);
#if 0
        if (micro >= 100) {
                fprintf(ftiming, "%s: %.1f microseconds\n", s, micro);
        } else if (micro >= 10) {
                fprintf(ftiming, "%s: %.3f microseconds\n", s, micro);
        } else {
                fprintf(ftiming, "%s: %.4f microseconds\n", s, micro);
        }
#endif
}

void
micromb(u64 sz, u64 n)
{
        struct timeval td;
        double mb, micro;

        tvsub(&td, &stop_tv, &start_tv);
        micro = td.tv_sec * 1000000 + td.tv_usec;
        micro /= n;
        mb = sz;
        mb /= MB;
        if (!ftiming) ftiming = stderr;
        if (micro >= 10) {
                fprintf(ftiming, "%.6f %.0f\n", mb, micro);
        } else {
                fprintf(ftiming, "%.6f %.3f\n", mb, micro);
        }
}

void
milli(char *s, u64 n)
{
        struct timeval td;
        u64 milli;

        tvsub(&td, &stop_tv, &start_tv);
        milli = td.tv_sec * 1000 + td.tv_usec / 1000;
        milli /= n;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming, "%s: %d milliseconds\n", s, (int)milli);
}

void
ptime(u64 n)
{
        struct timeval td;
        double s;

        tvsub(&td, &stop_tv, &start_tv);
        s = td.tv_sec + td.tv_usec / 1000000.0;
        if (!ftiming) ftiming = stderr;
        fprintf(ftiming,
            "%d in %.2f secs, %.0f microseconds each\n",
            (int)n, s, s * 1000000 / n);
}

u64
tvdelta(struct timeval *start, struct timeval *stop)
{
        struct timeval td;
        u64 usecs;

        tvsub(&td, stop, start);
        usecs = td.tv_sec;
        usecs *= 1000000;
        usecs += td.tv_usec;
        return (usecs);
}

void
tvsub(struct timeval * tdiff, struct timeval * t1, struct timeval * t0)
{
        tdiff->tv_sec = t1->tv_sec - t0->tv_sec;
        tdiff->tv_usec = t1->tv_usec - t0->tv_usec;
        if (tdiff->tv_usec < 0 && tdiff->tv_sec > 0) {
                tdiff->tv_sec--;
                tdiff->tv_usec += 1000000;
                assert(tdiff->tv_usec >= 0);
        }

        /* time shouldn't go backwards!!! */
        if (tdiff->tv_usec < 0 || t1->tv_sec < t0->tv_sec) {
                tdiff->tv_sec = 0;
                tdiff->tv_usec = 0;
        }
}

u64
gettime(void)
{
        return (tvdelta(&start_tv, &stop_tv));
}

double
timespent(void)
{
        struct timeval td;

        tvsub(&td, &stop_tv, &start_tv);
        return (td.tv_sec + td.tv_usec / 1000000.0);
}

static char p64buf[10][20];
static int n;

char *
p64(u64 big)
{
        char *s = p64buf[n++];

        if (n == 10) n = 0;
#ifdef linux
        {
        int *a = (int*)&big;

        if (a[1]) {
                sprintf(s, "0x%x%08x", a[1], a[0]);
        } else {
                sprintf(s, "0x%x", a[0]);
        }
        }
#endif
#ifdef __sgi
        sprintf(s, "0x%llx", big);
#endif
        return (s);
}

char *
p64sz(u64 big)
{
        double d = big;
        char *tags = " KMGTPE";
        int t = 0;
        char *s = p64buf[n++];

        if (n == 10) n = 0;
        while (d > 512) t++, d /= 1024;
        if (d == 0) {
                return ("0");
        }
        if (d < 100) {
                sprintf(s, "%.4f%c", d, tags[t]);
        } else {
                sprintf(s, "%.2f%c", d, tags[t]);
        }
        return (s);
}

char
last(char *s)
{
        while (*s++)
                ;
        return (s[-2]);
}

int
bytes(char *s)
{
        int n = atoi(s);

        if ((last(s) == 'k') || (last(s) == 'K'))
                n *= 1024;
        if ((last(s) == 'm') || (last(s) == 'M'))
                n *= (1024 * 1024);
        return (n);
}

void
use_int(int result) { use_result_dummy += result; }

void
use_pointer(void *result) { use_result_dummy += (int)result; }

void
insertinit(result_t *r)
{
        int i;

        r->N = 0;
        for (i = 0; i < TRIES; i++) {
                r->u[i] = 0;
                r->n[i] = 1;
        }
}

/* biggest to smallest */
void
insertsort(u64 u, u64 n, result_t *r)
{
        int i, j;

        if (u == 0) return;

        for (i = 0; i < r->N; ++i) {
                if (u/(double)n > r->u[i]/(double)r->n[i]) {
                        for (j = r->N; j > i; --j) {
                                r->u[j] = r->u[j-1];
                                r->n[j] = r->n[j-1];
                        }
                        break;
                }
        }
        r->u[i] = u;
        r->n[i] = n;
        r->N++;
}

static result_t results;

void
print_results(void)
{
        int i;

        for (i = 0; i < results.N; ++i) {
                fprintf(stderr, "%.2f ", (double)results.u[i]/results.n[i]);
        }
}

void
get_results(result_t *r)
{
        *r = results;
}

void
save_results(result_t *r)
{
        results = *r;
        save_median();
}

void
save_minimum()
{
        if (results.N == 0) {
                save_n(1);
                settime(0);
        } else {
                save_n(results.n[results.N - 1]);
                settime(results.u[results.N - 1]);
        }
}

void
save_median()
{
        int i = results.N / 2;
        u64 u, n;

        if (results.N == 0) {
                n = 1;
                u = 0;
        } else if (results.N % 2) {
                n = results.n[i];
                u = results.u[i];
        } else {
                n = (results.n[i] + results.n[i-1]) / 2;
                u = (results.u[i] + results.u[i-1]) / 2;
        }
        save_n(n); settime(u);
}

/*
 * The inner loop tracks bench.h but uses a different results array.
 */
static long *
one_op(register long *p)
{
        BENCH_INNER(p = (long *)*p, 0);
        return (p);
}

static long *
two_op(register long *p, register long *q)
{
        BENCH_INNER(p = (long *)*q; q = (long*)*p, 0);
        return (p);
}

static long *p = (long *)&p;
static long *q = (long *)&q;

double
l_overhead(void)
{
        int i;
        u64 N_save, u_save;
        static double overhead;
        static int initialized = 0;
        result_t one, two, r_save;

        init_timing();
        if (initialized) return (overhead);

        initialized = 1;
        if (getenv("LOOP_O")) {
                overhead = atof(getenv("LOOP_O"));
        } else {
                get_results(&r_save); N_save = get_n(); u_save = gettime();
                insertinit(&one);
                insertinit(&two);
                for (i = 0; i < TRIES; ++i) {
                        use_pointer((void*)one_op(p));
                        if (gettime() > t_overhead())
                                insertsort(gettime() - t_overhead(), get_n(), &one);
                        use_pointer((void *)two_op(p, q));
                        if (gettime() > t_overhead())
                                insertsort(gettime() - t_overhead(), get_n(), &two);
                }
                /*
                 * u1 = (n1 * (overhead + work))
                 * u2 = (n2 * (overhead + 2 * work))
                 * ==> overhead = 2. * u1 / n1 - u2 / n2
                 */
                save_results(&one); save_minimum();
                overhead = 2. * gettime() / (double)get_n();
                
                save_results(&two); save_minimum();
                overhead -= gettime() / (double)get_n();
                
                if (overhead < 0.) overhead = 0.; /* Gag */

                save_results(&r_save); save_n(N_save); settime(u_save);
        }
        return (overhead);
}

/*
 * Figure out the timing overhead. This has to track bench.h
 */
u64
t_overhead(void)
{
        u64 N_save, u_save;
        static int initialized = 0;
        static u64 overhead = 0;
        struct timeval tv;
        result_t r_save;

        init_timing();
        if (initialized) return (overhead);

        initialized = 1;
        if (getenv("TIMING_O")) {
                overhead = atof(getenv("TIMING_O"));
        } else if (get_enough(0) <= 50000) {
                /* it is not in the noise, so compute it */
                int i;
                result_t r;

                get_results(&r_save); N_save = get_n(); u_save = gettime();
                insertinit(&r);
                for (i = 0; i < TRIES; ++i) {
                        BENCH_INNER(gettimeofday(&tv, 0), 0);
                        insertsort(gettime(), get_n(), &r);
                }
                save_results(&r);
                save_minimum();
                overhead = gettime() / get_n();

                save_results(&r_save); save_n(N_save); settime(u_save);
        }
        return (overhead);
}

/*
 * Figure out how long to run it.
 * If enough == 0, then they want us to figure it out.
 * If enough is !0 then return it unless we think it is too short.
 */
static int long_enough;
static int compute_enough();

int
get_enough(int e)
{
        init_timing();
        return (long_enough > e ? long_enough : e);
}

static void
init_timing(void)
{
        static int done = 0;

        if (done) return;
        done = 1;
        long_enough = compute_enough();
        t_overhead();
        l_overhead();
}

typedef long TYPE;

static TYPE **
enough_duration(register long N, register TYPE ** p)
{
#define ENOUGH_DURATION_TEN(one) one one one one one one one one one one
        while (N-- > 0) {
                ENOUGH_DURATION_TEN(p = (TYPE **) *p;);
        }
        return (p);
}

static u64
duration(long N)
{
        u64 usecs;
        TYPE *x = (TYPE *)&x;
        TYPE **p = (TYPE **)&x;

        start(0);
        p = enough_duration(N, p);
        usecs = stop(0, 0);
        use_pointer((void *)p);
        return (usecs);
}

/*
 * find the minimum time that work "N" takes in "tries" tests
 */
static u64
time_N(long N)
{
        int i;
        u64 usecs;
        result_t r;

        insertinit(&r);
        for (i = 1; i < TRIES; ++i) {
                usecs = duration(N);
                insertsort(usecs, N, &r);
        }
        save_results(&r);
        save_minimum();
        return (gettime());
}

/*
 * return the amount of work needed to run "enough" microseconds
 */
static long
find_N(int enough)
{
        int tries;
        static long N = 10000;
        static u64 usecs = 0;

        if (!usecs) usecs = time_N(N);

        for (tries = 0; tries < 10; ++tries) {
                if (0.98 * enough < usecs && usecs < 1.02 * enough)
                        return (N);
                if (usecs < 1000)
                        N *= 10;
                else {
                        double n = N;

                        n /= usecs;
                        n *= enough;
                        N = n + 1;
                }
                usecs = time_N(N);
        }
        return (-1);
}

/*
 * We want to verify that small modifications proportionally affect the runtime
 */
static double test_points[] = {1.015, 1.02, 1.035};
static int
test_time(int enough)
{
        int i;
        long N;
        u64 usecs, expected, baseline, diff;

        if ((N = find_N(enough)) <= 0)
                return (0);

        baseline = time_N(N);

        for (i = 0; i < sizeof(test_points) / sizeof(double); ++i) {
                usecs = time_N((int)((double) N * test_points[i]));
                expected = (u64)((double)baseline * test_points[i]);
                diff = expected > usecs ? expected - usecs : usecs - expected;
                if (diff / (double)expected > 0.0025)
                        return (0);
        }
        return (1);
}

/*
 * We want to find the smallest timing interval that has accurate timing
 */
static int possibilities[] = { 5000, 10000, 50000, 100000 };
static int
compute_enough()
{
        int i;

        if (getenv("ENOUGH")) {
                return (atoi(getenv("ENOUGH")));
        }
        for (i = 0; i < sizeof(possibilities) / sizeof(int); ++i) {
                if (test_time(possibilities[i]))
                        return (possibilities[i]);
        }

        /*
         * if we can't find a timing interval that is sufficient,
         * then use SHORT as a default.
         */
        return (SHORT);
}

/*
 * This stuff isn't really lib_timing, but ...
 */
void
morefds(void)
{
#ifdef RLIMIT_NOFILE
        struct rlimit r;

        getrlimit(RLIMIT_NOFILE, &r);
        r.rlim_cur = r.rlim_max;
        setrlimit(RLIMIT_NOFILE, &r);
#endif
}

void
touch(char *buf, int nbytes)
{
        static psize;

        if (!psize) {
                psize = getpagesize();
        }
        while (nbytes > 0) {
                *buf = 1;
                buf += psize;
                nbytes -= psize;
        }
}

#if defined(hpux) || defined(__hpux)
int
getpagesize()
{
        return (sysconf(_SC_PAGE_SIZE));
}
#endif

#ifdef WIN32
int
getpagesize()
{
        SYSTEM_INFO s;

        GetSystemInfo(&s);
        return ((int)s.dwPageSize);
}

LARGE_INTEGER
getFILETIMEoffset()
{
        SYSTEMTIME s;
        FILETIME f;
        LARGE_INTEGER t;

        s.wYear = 1970;
        s.wMonth = 1;
        s.wDay = 1;
        s.wHour = 0;
        s.wMinute = 0;
        s.wSecond = 0;
        s.wMilliseconds = 0;
        SystemTimeToFileTime(&s, &f);
        t.QuadPart = f.dwHighDateTime;
        t.QuadPart <<= 32;
        t.QuadPart |= f.dwLowDateTime;
        return (t);
}

int
gettimeofday(struct timeval *tv, struct timezone *tz)
{
        LARGE_INTEGER t;
        FILETIME f;
        double microseconds;
        static LARGE_INTEGER offset;
        static double frequencyToMicroseconds;
        static int initialized = 0;
        static BOOL usePerformanceCounter = 0;

        if (!initialized) {
                LARGE_INTEGER performanceFrequency;
                initialized = 1;
                usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency);
                if (usePerformanceCounter) {
                        QueryPerformanceCounter(&offset);
                        frequencyToMicroseconds = (double)performanceFrequency.QuadPart / 1000000.;
                } else {
                        offset = getFILETIMEoffset();
                        frequencyToMicroseconds = 10.;
                }
        }
        if (usePerformanceCounter) QueryPerformanceCounter(&t);
        else {
                GetSystemTimeAsFileTime(&f);
                t.QuadPart = f.dwHighDateTime;
                t.QuadPart <<= 32;
                t.QuadPart |= f.dwLowDateTime;
        }

        t.QuadPart -= offset.QuadPart;
        microseconds = (double)t.QuadPart / frequencyToMicroseconds;
        t.QuadPart = microseconds;
        tv->tv_sec = t.QuadPart / 1000000;
        tv->tv_usec = t.QuadPart % 1000000;
        return (0);
}
#endif
char *id = "$Id: lmdd.c,v 1.23 1997/12/01 23:47:59 lm Exp $\n";
/*
 * defaults:
 * bs=8k
 * count=forever
 * if=internal
 * of=internal
 * ipat=0
 * opat=0
 * mismatch=0
 * rusage=0
 * flush=0
 * rand=0
 * print=0
 * direct=0
 * rt=0
 * rtmax=0
 * wtmax=0
 * rtmin=0
 * wtmin=0
 * label=""
 * shorthands:
 * k, m, g are 2^10, 2^20, 2^30 multipliers.
 * K, M, G are 10^3, 10^6, 10^9 multipliers.
 * recognizes "internal" as an internal /dev/zero /dev/null file.
 *
 * Copyright (c) 1994-1998 by Larry McVoy. All rights reserved.
 * See the file COPYING for the licensing terms.
 *
 * TODO - rewrite this entire thing from scratch. This is disgusting code.
 */

#ifndef __Lynx__
#define FLUSH
#endif

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <malloc.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#ifdef USE_BDS
#include "bds.h"
#endif
#include <sys/time.h>
#include "bench.h"

#define ALIGN(x, bs) ((x + (bs - 1)) & ~(bs - 1))

#ifdef FLUSH
#include <sys/mman.h>
#include <sys/stat.h>
void flush(void);
#endif

#define USE_VALLOC
#ifdef USE_VALLOC
#define VALLOC valloc
#else
#define VALLOC malloc
#endif

extern double drand48();

#ifdef __sgi
# define LSEEK(a,b,c) (u64)lseek64(a, (off64_t)b, c)
# define ATOL(s) atoll(s)
#else
# define LSEEK(a,b,c) (u64)lseek(a, b, c)
# define ATOL(s) atol(s)
#endif

int awrite, poff, out, Print, Fsync, Sync, Flush, Bsize, ru;
u64 Start, End, Rand, int_count;
int hash;
int Realtime, Notrunc;
int Rt, Rtmax, Rtmin, Wt, Wtmax, Wtmin;
int rthist[12]; /* histogram of read times */
int wthist[12]; /* histogram of write times */
char *Label;
u64 *norepeat;
int norepeats = -1;
#ifdef USE_BDS
        bds_msg *m1, *m2;
#endif

u64 getarg();
int been_there(u64 off);
int getfile(char *s, int ac, char **av);

char *cmds[] = {
        "bs", /* block size */
        "bufs", /* use this many buffers round robin */
        "count", /* number of blocks */
#ifdef DBG
        "debug", /* set external variable "dbg" */
#endif
#ifdef O_DIRECT
        "direct", /* direct I/O on input and output */
        "idirect", /* direct I/O on input */
        "odirect", /* direct I/O on output */
#endif
#ifdef FLUSH
        "flush", /* map in out and invalidate (flush) */
#endif
        "fork", /* fork to do write I/O */
        "fsync", /* fsync output before exit */
        "if", /* input file */
        "ipat", /* check input for pattern */
        "label", /* prefix print out with this */
        "mismatch", /* stop at first mismatch */
        "move", /* instead of count, limit transfer to this */
        "of", /* output file */
        "opat", /* generate pattern on output */
        "print", /* report type */
        "rand", /* do randoms over the specified size */
                                /* must be power of two, not checked */
        "poff", /* Print the offsets as we do the io. */
#ifdef RUSAGE
        "rusage", /* dump rusage stats */
#endif
        "skip", /* skip this number of blocks */
        "sync", /* sync output before exit */
        "touch", /* touch each buffer after the I/O */
#if !defined(hpux)
        "usleep", /* sleep this many usecs between I/O */
#endif
        "hash", /* hash marks like FTP */
        "append", /* O_APPEND */
        "rt", /* time reads */
        "wt", /* time writes */
        "rtmax", /* read latency histogram max in mills */
        "wtmax", /* write latency histogram max in mills */
        "rtmin", /* read latency histogram max in mills */
        "wtmin", /* write latency histogram max in mills */
        "realtime", /* create files as XFS realtime files */
        "notrunc", /* overwrite rather than truncing out file */
        "end", /* limit randoms to this size near the
                                 * Rand endpoints. */
        "start", /* Add this to Rand */
        "time", /* Run for this many seconds only. */
        "srand", /* Seed the random number generator */
        "padin", /* Pad an extra untimed block_size read */
#ifdef USE_BDS
        "awrite", /* use async writes and pipeline them. */
#endif
        "norepeat", /* don't ever do the same I/O twice */
#ifdef sgi
        "mpin", /* pin the buffer */
#endif
        "timeopen", /* include open time in results */
        "nocreate", /* just open for writing, don't create/trunc it */
#ifdef O_SYNC
        "osync", /* O_SYNC */
#endif
        0,
};

void error(char *);
void done();
#ifdef DBG
extern int dbg;
#endif

int
main(int ac, char **av)
{
        u32 *buf;
        u32 *bufs[10];
        int nbufs, nextbuf = 0;
        int Fork, misses, mismatch, outpat, inpat, in, timeopen, gotcnt;
        int slp;
        u64 skip, size, count;
        void chkarg();
        int i;
        u64 off = 0;
        int touch;
        int time;
        int mills;
        int pad_in;
        int pid = 0;
        struct timeval start_tv;
        struct timeval stop_tv;

        if (sizeof(int) != 4) {
                fprintf(stderr, "sizeof(int) != 4\n");
                exit(1);
        }
        for (i = 1; i < ac; ++i) {
                chkarg(av[i]);
        }
        signal(SIGINT, done);
        signal(SIGALRM, done);
        misses = mismatch = getarg("mismatch=", ac, av);
        inpat = getarg("ipat=", ac, av);
        outpat = getarg("opat=", ac, av);
        Bsize = getarg("bs=", ac, av);
        if (Bsize < 0)
                Bsize = 8192;
#if !defined(hpux)
        slp = getarg("usleep=", ac, av);
#endif
        Fork = getarg("fork=", ac, av);
        Fsync = getarg("fsync=", ac, av);
        Sync = getarg("sync=", ac, av);
        Rand = getarg("rand=", ac, av);
        Start = getarg("start=", ac, av);
        End = getarg("end=", ac, av);
        time = getarg("time=", ac, av);
        if ((End != -1) && (Rand != -1) && (End > Rand)) {
                End = Rand;
        }
        if (getarg("srand=", ac, av) != -1) {
                srand48((long)getarg("srand=", ac, av));
        }
        poff = getarg("poff=", ac, av) != -1;
        Print = getarg("print=", ac, av);
        nbufs = getarg("bufs=", ac, av);
        Realtime = getarg("realtime=", ac, av);
        Rtmax = getarg("rtmax=", ac, av);
        if ((Rtmax != -1) && (Rtmax < 10))
                Rtmax = 10;
        Rtmin = getarg("rtmin=", ac, av);
        if ((Rtmax != -1) && (Rtmin == -1)) {
                Rtmin = 0;
        }
        Rt = getarg("rt=", ac, av);
        Wtmax = getarg("wtmax=", ac, av);
        if ((Wtmax != -1) && (Wtmax < 10))
                Wtmax = 10;
        Wtmin = getarg("wtmin=", ac, av);
        if ((Wtmax != -1) && (Wtmin == -1)) {
                Wtmin = 0;
        }
        Wt = getarg("wt=", ac, av);
        if ((Rtmin && !Rtmax) || (Wtmin && !Wtmax)) {
                fprintf(stderr, "Need a max to go with that min.\n");
                exit(1);
        }
        if ((Rtmin > Rtmax) || (Wtmin > Wtmax)) {
                fprintf(stderr,
                    "min has to be less than max, R=%d,%d W=%d,%d\n",
                    Rtmax, Rtmin, Wtmax, Wtmin);
                exit(1);
        }
        timeopen = getarg("timeopen=", ac, av);
        pad_in = getarg("padin=", ac, av);
        if (pad_in == -1) pad_in = 0;
        
        if (nbufs == -1) nbufs = 1;
        if (nbufs > 10) { printf("Too many bufs\n"); exit(1); }
#ifdef DBG
        dbg = getarg("debug=", ac, av) != -1;
#endif
#ifdef RUSAGE
        ru = getarg("rusage=", ac, av);
#endif
        touch = getarg("touch=", ac, av) != -1;
        hash = getarg("hash=", ac, av) != (u64)-1;
        Label = (char *)(u32)getarg("label=", ac, av);
        count = getarg("count=", ac, av);
        size = getarg("move=", ac, av);
        if (size != (u64)-1)
                count = size / Bsize;
        if (Rand != -1) {
                size = Rand - Bsize;
                size = ALIGN(size, Bsize);
        }

#ifdef FLUSH
        Flush = getarg("flush=", ac, av);
#endif
        if (count == (u64)-1)
                gotcnt = 0;
        else
                gotcnt = 1;
        int_count = 0;
        skip = getarg("skip=", ac, av);
        if (getarg("norepeat=", ac, av) != -1) {
                if (gotcnt) {
                        norepeat = (u64*)calloc(count, sizeof(u64));
                } else {
                        norepeat = (u64*)calloc(10<<10, sizeof(u64));
                }
        }

        if ((inpat != -1 || outpat != -1) && (Bsize & 3)) {
                fprintf(stderr, "Block size 0x%x must be word aligned\n", Bsize);
                exit(1);
        }
        if ((Bsize >> 2) == 0) {
                fprintf(stderr, "Block size must be at least 4.\n");
                exit(1);
        }
        for (i = 0; i < nbufs; i++) {
                if (!(bufs[i] = (u32 *) VALLOC((unsigned) Bsize))) {
                        perror("VALLOC");
                        exit(1);
                }
                bzero((char *) bufs[i], Bsize);
#ifdef sgi
                if (getarg("mpin=", ac, av) != -1) {
                        if (mpin((void *)bufs[i], (size_t)Bsize)) {
                                perror("mpin for adam");
                        }
                }
#endif
        }

        if (time != -1) {
                alarm(time);
        }
        if (timeopen != -1) {
                start(NULL);
        }
        in = getfile("if=", ac, av);
        out = getfile("of=", ac, av);
        if (timeopen == -1) {
                start(NULL);
        }
        if ((Rtmax != -1) && in < 0) {
                fprintf(stderr, "I think you wanted wtmax, not rtmax\n");
                exit(1);
        }
        if ((Wtmax != -1) && out < 0) {
                fprintf(stderr, "I think you wanted rtmax, not wtmax\n");
                exit(1);
        }
        if (skip != (u64)-1) {
                off = skip;
                off *= Bsize;
                if (in >= 0) {
                        LSEEK(in, off, 0);
                }
                if (out >= 0) {
                        LSEEK(out, off, 0);
                }
                if (poff) {
                        fprintf(stderr, "%s ", p64sz(off));
                }
        }
        for (;;) {
                register int moved;

                if (gotcnt && count-- <= 0) {
                        done();
                }

                /*
                 * If End is set, it means alternate back and forth
                 * between the end points of Rand, doing randoms within
                 * the area 0..End and Rand-End..Rand
                 */
                if (End != -1) {
                        static u64 start = 0;

                        start = start ? 0 : Rand - End;
                        do {
                                off = drand48() * End;
                                off = ALIGN(off, Bsize);
                                off += start;
                                if (Start != -1) {
                                        off += Start;
                                }
                        } while (norepeat && been_there(off));
                        if (norepeat) {
                                norepeat[norepeats++] = off;
                                if (!gotcnt && (norepeats == 10<<10)) {
                                        norepeats = 0;
                                }
                        }
                        if (in >= 0) {
                                LSEEK(in, off, 0);
                        }
                        if (out >= 0) {
                                LSEEK(out, off, 0);
                        }
                }
                /*
                 * Set the seek pointer if doing randoms
                 */
                else if (Rand != -1) {
                        do {
                                off = drand48() * (size - Bsize);
                                if (Start != -1) {
                                        off += Start;
                                }
                                off = ALIGN(off, Bsize);
                        } while (norepeat && been_there(off));
                        if (norepeat) {
                                norepeat[norepeats++] = off;
                        }
                        if (!gotcnt && (norepeats == 10<<10)) {
                                norepeats = 0;
                        }
                        if (in >= 0) {
                                LSEEK(in, off, 0);
                        }
                        if (out >= 0) {
                                LSEEK(out, off, 0);
                        }
                }
                if (poff) {
                        fprintf(stderr, "%s ", p64sz(off));
                }

                buf = bufs[nextbuf];
                if (++nextbuf == nbufs) nextbuf = 0;
                if (in >= 0) {
                        if ((Rt != -1) || (Rtmax != -1) || (Rtmin != -1)) {
                                start(&start_tv);
                        }
                        moved = read(in, buf, Bsize);
                        
                        if (pad_in) { /* ignore this run, restart clock */
                            pad_in = 0;
                            count++;
                            start(NULL);
                            continue;
                        }
                        
                        if (Rt) {
                                int mics = stop(&start_tv, &stop_tv);
                                
                                mills = mics / 1000;
                                fprintf(stderr,
                                    "READ: %.02f milliseconds offset %s, ",
                                    ((float)mics) / 1000,
                                    p64sz(LSEEK(in, 0, SEEK_CUR)));
                                fprintf(stderr, "%.02f MB/sec\n",
                                    (double)moved/mics);
                        } else if ((Rtmax != -1) || (Rtmin != -1)) {
                                int mics = stop(&start_tv, &stop_tv);
                                
                                mills = mics / 1000;
                                if ((mills > Rtmax) || (mills < Rtmin)) {
                                        fprintf(stderr,
                                          "READ: %.02f milliseconds offset %s\n",
                                                ((float)mics) / 1000,
                                                p64sz(LSEEK(in, 0, SEEK_CUR)));
                                }
                                /*
                                 * Put this read time in the histogram.
                                 * The buckets are each 1/10th of Rtmax.
                                 */
                                if (mills >= Rtmax) {
                                        rthist[11]++;
                                } else if (mills < Rtmin) {
                                        rthist[0]++;
                                } else {
                                        int step = (Rtmax - Rtmin) / 10;
                                        int i;

                                        for (i = 1; i <= 10; ++i) {
                                                if (mills < i * step + Rtmin) {
                                                        rthist[i]++;
                                                        break;
                                                }
                                        }
                                }
                        }
                } else {
                        moved = Bsize;
                }
                if (moved == -1) {
                        perror("read");
                }
                if (moved <= 0) {
                        done();
                }
                if (inpat != -1) {
                        register int foo, cnt;

                        for (foo = 0, cnt = moved/sizeof(int); cnt--; foo++) {
                                if (buf[foo] != (u32) (off + foo*sizeof(int))) {
                                        fprintf(stderr,
                                            "off=%u want=%x got=%x\n",
                                            (u32)off,
                                            (u32)(off + foo*sizeof(int)),
                                            buf[foo]);
                                        if (mismatch != -1 && --misses == 0) {
                                                done();
                                        }
                                }
                        }
                }
                if ((in >= 0) && touch) {
                        int i;

                        for (i = 0; i < moved; i += 4096) {
                                ((char *)buf)[i] = 0;
                        }
                }
                if (out >= 0) {
                        int moved2;

                        if (Fork != -1) {
                                if (pid) {
                                        waitpid(pid, 0, 0);
                                }
                                if ((pid = fork())) {
                                        off += moved;
                                        int_count += (moved >> 2);
                                        continue;
                                }
                        }
                        if (outpat != -1) {
                                register int foo, cnt;

                                for (foo = 0, cnt = moved/sizeof(int);
                                    cnt--; foo++) {
                                        buf[foo] =
                                            (u32)(off + foo*sizeof(int));
                                }
                        }
                        if ((Wt != -1) || (Wtmax != -1) || (Wtmin != -1)) {
                                start(&start_tv);
                        }
#ifdef USE_BDS
                        /*
                         * The first time through, m1 & m2 are null.
                         * The Nth time through, we start the I/O into
                         * m2, and wait on m1, then switch.
                         */
                        if (awrite) {
                                if (m1) {
                                        m2 = bds_awrite(out, buf, moved);
                                        moved2 = bds_adone(out, m1);
                                        m1 = m2;
                                } else {
                                        m1 = bds_awrite(out, buf, moved);
                                        goto writedone;
                                }
                        } else {
                                moved2 = write(out, buf, moved);
                        }
#else
                        moved2 = write(out, buf, moved);
#endif

                        if (moved2 == -1) {
                                perror("write");
                        }
                        if (moved2 != moved) {
                                fprintf(stderr, "write: wanted=%d got=%d\n",
                                    moved, moved2);
                                done();
                        }
                        if (Wt) {
                                int mics = stop(&start_tv, &stop_tv);
                                
                                mills = mics / 1000;
                                fprintf(stderr,
                                    "WRITE: %.02f milliseconds offset %s, ",
                                    ((float)mics) / 1000,
                                    p64sz(LSEEK(out, 0, SEEK_CUR)));
                                fprintf(stderr, "%.02f MB/sec\n",
                                    (double)moved/mics);
                        } else if ((Wtmax != -1) || (Wtmin != -1)) {
                                int mics = stop(&start_tv, &stop_tv);

                                mills = mics / 1000;
                                if ((mills > Wtmax) || (mills < Wtmin)) {
                                        fprintf(stderr,
                                          "WRITE: %.02f milliseconds offset %s\n",
                                                ((float)mics) / 1000,
                                                p64sz(LSEEK(out, 0, SEEK_CUR)));
                                }
                                /*
                                 * Put this write time in the histogram.
                                 * The buckets are each 1/10th of Wtmax.
                                 */
                                if (mills >= Wtmax) {
                                        wthist[11]++;
                                } else if (mills < Wtmin) {
                                        wthist[0]++;
                                } else {
                                        int step = (Wtmax - Wtmin) / 10;
                                        int i;

                                        for (i = 1; i <= 10; ++i) {
                                                if (mills < i * step + Wtmin) {
                                                        wthist[i]++;
                                                        break;
                                                }
                                        }
                                }
                        }

                        if (moved2 == -1) {
                                perror("write");
                        }
                        if (moved2 != moved) {
                                done();
                        }

                        if (touch) {
                                int i;

                                for (i = 0; i < moved; i += 4096) {
                                        ((char *)buf)[i] = 0;
                                }
                        }
                }
#ifdef USE_BDS
writedone: /* for the first async write */
#endif
                off += moved;
                int_count += (moved >> 2);
#if !defined(hpux)
                if (slp != -1) {
                        usleep(slp);
                }
#endif
                if (hash) {
                        fprintf(stderr, "#");
                }
                if (Fork != -1) {
                        exit(0);
                }
        }
}

int
been_there(u64 off)
{
        register int i;

        for (i = 0; i <= norepeats; ++i) {
                if (off == norepeat[i]) {
                        fprintf(stderr, "norepeat on %u\n", (u32)off);
                        return (1);
                }
        }
        return (0);
}

void
chkarg(char *arg)
{
        int i;
        char *a, *b;

        for (i = 0; cmds[i]; ++i) {
                for (a = arg, b = cmds[i]; *a && *b && *a == *b; a++, b++)
                        ;
                if (*a == '=')
                        return;
        }
        fprintf(stderr, "Bad arg: %s, possible arguments are: ", arg);
        for (i = 0; cmds[i]; ++i) {
                fprintf(stderr, "%s ", cmds[i]);
        }
        fprintf(stderr, "\n");
        exit(1);
        /*NOTREACHED*/
}

void
done(void)
{
        int i;
        int step;
        int size;

#ifdef USE_BDS
        if (awrite && m1) {
                bds_adone(out, m1);
        }
#endif
        if (Sync > 0)
                sync();
        if (Fsync > 0)
                fsync(out);
#ifdef FLUSH
        if (Flush > 0)
                flush();
#endif
        stop(NULL, NULL);
#ifdef RUSAGE
        if (ru != -1)
                rusage();
#endif
        if (hash || poff) {
                fprintf(stderr, "\n");
        }
        if ((long)Label != -1) {
                fprintf(stderr, "%s", Label);
        }
        int_count <<= 2;
        switch (Print) {
            case 0: /* no print out */
                    break;

            case 1: /* latency type print out */
                latency((u64)(int_count / Bsize), (u64)Bsize);
                break;

            case 2: /* microsecond per op print out */
                micro("", (u64)(int_count / Bsize));
                break;

            case 3: /* kb / sec print out */
                kb(int_count);
                break;

            case 4: /* mb / sec print out */
                mb(int_count);
                break;

            case 5: /* Xgraph output */
                bandwidth(int_count, 1, 0);
                break;

            default: /* bandwidth print out */
                bandwidth(int_count, 1, 1);
                break;
        }
        if (Rtmax != -1) {
                printf("READ operation latencies\n");
                step = (Rtmax - Rtmin) / 10;
                if (rthist[0]) {
                        printf("%d- ms: %d\n", Rtmin, rthist[0]);
                }
                for (i = 1, size = Rtmin; i <= 10; i++, size += step) {
                        if (!rthist[i])
                                continue;
                        printf("%d to %d ms: %d\n",
                               size, size + step - 1, rthist[i]);
                }
                if (rthist[11]) {
                        printf("%d+ ms: %d\n", Rtmax, rthist[11]);
                }
        }
        if (Wtmax != -1) {
                printf("WRITE operation latencies\n");
                step = (Wtmax - Wtmin) / 10;
                if (wthist[0]) {
                        printf("%d- ms: %d\n", Wtmin, wthist[0]);
                }
                for (i = 1, size = Wtmin; i <= 10; i++, size += step) {
                        if (!wthist[i])
                                continue;
                        printf("%d to %d ms: %d\n",
                               size, size + step - 1, wthist[i]);
                }
                if (wthist[11]) {
                        printf("%d+ ms: %d\n", Wtmax, wthist[11]);
                }
        }
        exit(0);
}

u64
getarg(char *s, int ac, char **av)
{
        register u64 len, i;

        len = strlen(s);

        for (i = 1; i < ac; ++i) {
                if (!strncmp(av[i], s, len)) {
                        register u64 bs = ATOL(&av[i][len]);

                        switch (av[i][strlen(av[i]) - 1]) {
                            case 'K': bs *= 1000; break;
                            case 'k': bs <<= 10; break;
                            case 'M': bs *= 1000000; break;
                            case 'm': bs <<= 20; break;
                            case 'G': bs *= 1000000000L; break;
                            case 'g': bs <<= 30; break;
                        }

                        if (!strncmp(av[i], "label", 5)) {
                                return (u64)(u32)(&av[i][len]); /* HACK */
                        }
                        if (!strncmp(av[i], "bs=", 3)) {
                                return (u64)(bs);
                        }
                        return (bs);
                }
        }
        return ((u64)-1);
}

char *output;

int
getfile(char *s, int ac, char **av)
{
        register int ret, len, i;
        int append = getarg("append=", ac, av) != -1;
        int notrunc = getarg("notrunc=", ac, av) != -1;
        int nocreate = getarg("nocreate=", ac, av) != -1;
#ifdef O_SYNC
        int osync = getarg("osync=", ac, av) != -1;
#endif
        int oflags;

        len = strlen(s);

        for (i = 1; i < ac; ++i) {
                if (!strncmp(av[i], s, len)) {
                        if (av[i][0] == 'o') {
                                if (!strcmp("of=internal", av[i]))
                                        return (-2);
                                if (!strcmp("of=stdout", av[i]))
                                        return (1);
                                if (!strcmp("of=1", av[i]))
                                        return (1);
                                if (!strcmp("of=-", av[i]))
                                        return (1);
                                if (!strcmp("of=stderr", av[i]))
                                        return (2);
                                if (!strcmp("of=2", av[i]))
                                        return (2);
                                oflags = O_WRONLY;
                                oflags |= (notrunc || append) ? 0 : O_TRUNC;
                                oflags |= nocreate ? 0 : O_CREAT;
                                oflags |= append ? O_APPEND : 0;
#ifdef O_SYNC
                                oflags |= osync ? O_SYNC : 0;
#endif
                                ret = open(&av[i][len], oflags,0644);
#ifdef O_DIRECT
                                if ((getarg("odirect=", ac, av) != -1) ||
                                    (getarg("direct=", ac, av) != -1)) {
                                        close(ret);
                                        ret = open(&av[i][len], oflags|O_DIRECT);
                                        awrite =
                                            getarg("awrite=", ac, av) != -1;
                                }
#endif
                                if (ret == -1)
                                        error(&av[i][len]);
#ifdef F_FSSETXATTR
                                if (Realtime == 1) {
                                        struct fsxattr fsxattr;
                                
                                        bzero(&fsxattr,sizeof(struct fsxattr));
                                        fsxattr.fsx_xflags = 0x1;
                                        if (fcntl(ret,F_FSSETXATTR,&fsxattr)){
                                                printf("WARNING: Could not make %s a real time file\n",
                                                       &av[i][len]);
                                        }
                                }
#endif
                                output = &av[i][len];
                                return (ret);
                        } else {
                                if (!strcmp("if=internal", av[i]))
                                        return (-2);
                                if (!strcmp("if=stdin", av[i]))
                                        return (0);
                                if (!strcmp("if=0", av[i]))
                                        return (0);
                                if (!strcmp("if=-", av[i]))
                                        return (0);
                                ret = open(&av[i][len], 0);
#ifdef O_DIRECT
                                if ((getarg("idirect=", ac, av) != -1) ||
                                    (getarg("direct=", ac, av) != -1)) {
                                        close(ret);
                                        ret = open(&av[i][len], O_RDONLY|O_DIRECT);
                                }
#endif
                                if (ret == -1)
                                        error(&av[i][len]);
                                return (ret);
                        }
                }
        }
        return (-2);
}

#ifdef FLUSH
int
warning(char *s)
{
        if ((long)Label != -1) {
                fprintf(stderr, "%s: ", Label);
        }
        perror(s);
        return (-1);
}

void
flush(void)
{
        int fd;
        struct stat sb;
        caddr_t where;

        if (output == NULL || (fd = open(output, 2)) == -1) {
                warning("No output file");
                return;
        }
        if (fstat(fd, &sb) == -1 || sb.st_size == 0) {
                warning(output);
                return;
        }
        where = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
        msync(where, sb.st_size, MS_INVALIDATE);
        munmap(where, sb.st_size);
}
#endif

void
error(char *s)
{
        if ((long)Label != -1) {
                fprintf(stderr, "%s: ", Label);
        }
        perror(s);
        exit(1);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon May 07 2001 - 21:00:21 EST