page->mapping == NULL recreated without vmware...

From: Petr Vandrovec (vandrove@vc.cvut.cz)
Date: Fri Oct 06 2000 - 12:25:58 EST


Hi,
  month ago I informed here that VMware causes oops on exit.
After some time, and tons of tweaking I was able to recreate
it without vmmon... 2.4.0-test9, no special patches, no
vmware modules loaded...

  Machine is dual PIII/450, 256MB RAM, 18GB IDE disk.

  Here it is... Adjust MSIZE so that it is almost all of your
memory, to cause swapping... (0x0E000000 is for mine 256MB).

/*
Makefile:

CFLAGS = -W -Wall -O2 -D_GNU_SOURCE -D_BSD_SOURCE

oopsdemo: oopsdemo.c
*/

#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/shm.h>
#include <sys/ioctl.h>
#include <sys/raw.h>

unsigned char zero;

#define MSIZE 0x0E000000

int mfd;

void x4768(void) {
        int fd;
        pid_t pid;
        int x4778_1[2];
        int x4778_2[2];
        int x4779_1[2];
        int x4779_2[2];
        int from4778;
        int to4778;
        int from4779;
        int to4779;
        char* mma[65536];
        unsigned int mml[65536];
        unsigned int mmi = 0;
        unsigned int ln = 0;
        unsigned int x;
        
#define MAL2(a,l) ftruncate(fd, ln+l); mml[mmi] = l; mma[mmi++] = mmap(a, l, PROT_READ|PROT_WRITE, MAP_SHARED, fd, ln); ln += l; mma[mmi-1][0] = 99;
#define MAL(l) MAL2(NULL,l)

        fd = open("ram0", O_RDWR | O_CREAT, 0600);
        unlink("ram0");
        MAL(MSIZE);
        pipe(x4778_1);
        pipe(x4778_2);
        pid = fork();
        if (!pid) {
                int from4768 = x4778_1[0];
                int to4768 = x4778_2[1];
                close(x4778_1[1]);
                close(x4778_2[0]);
                close(fd);
                read(from4768, &zero, 1);
#if 1
                for (x = 0; x < mml[mmi - 1]; x += 4096)
                        mma[mmi-1][x] = 98;
#endif
                write(to4768, &zero, 1);
                read(from4768, &zero, 1);
                read(mfd, mma[mmi - 1], mml[mmi - 1]);
                printf("1a: %02X\n", mma[mmi-1][0]);
                fflush(stdout);
                mma[mmi-1][0] = 99;
                printf("1b: %02X\n", mma[mmi-1][0]);
                /* well, now we have it... */
#if 1
                { char* p; int fda;
                  fda = open("/dev/zero", O_RDONLY);
                  p = mmap(NULL, MSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
                  for (x = 0; x < MSIZE; x += 4096)
                    p[x] = 97;
                  close(fda);
                }
#endif
                
                exit(0);
                
        } else if (pid < 0) {
                perror("fork failed");
                exit(222);
        }
        to4778 = x4778_1[1];
        from4778 = x4778_2[0];
        close(x4778_1[0]);
        close(x4778_2[1]);

#if 1
        pipe(x4779_1);
        pipe(x4779_2);
        pid = fork();
        if (!pid) {
                int from4768 = x4779_1[0];
                int to4768 = x4779_2[1];
                close(x4779_1[1]);
                close(x4779_2[0]);
                close(to4778);
                close(from4778);
                close(fd);
                read(from4768, &zero, 1);
#if 0
                for (x = 0; x < mml[mmi - 1]; x += 4096)
                        mma[mmi-1][x] = 96;
#endif
                write(to4768, &zero, 1);
                read(from4768, &zero, 1);
                read(mfd, mma[mmi - 1], mml[mmi - 1]);
                printf("2a: %02X\n", mma[mmi-1][0]);
                fflush(stdout);
                mma[mmi-1][0] = 95;
                printf("2b: %02X\n", mma[mmi-1][0]);
                exit(0);
                
        } else if (pid < 0) {
                perror("fork failed");
                exit(222);
        }
        to4779 = x4779_1[1];
        from4779 = x4779_2[0];
        close(x4779_1[0]);
        close(x4779_2[1]);
#else
        /* so that read/write to this fails... */
        from4779 = -1;
        to4779 = -1;
#endif
        {
                write(to4778, &zero, 1);
                write(to4779, &zero, 1);
                read(from4778, &zero, 1);
                read(from4779, &zero, 1);

                write(to4778, &zero, 1);
                write(to4779, &zero, 1);
                sleep(5);
                ftruncate(fd, 0);
        }
        close(fd);
        while (mmi--) {
                munmap(mma[mmi], mml[mmi]);
        }
        exit(0);
}

int main(int argc, char* argv[]) {
        struct stat stb;
        int q;
        char rawsubdev[100];
        int min;

        if (argc < 2) {
                fprintf(stderr, "testit /dev/hda\n");
                return 97;
        }
        q = stat(argv[1], &stb);
        if (q != 0) {
                fprintf(stderr, "%s: Cannot stat: %m\n", argv[1]);
                return 98;
        }
        q = open("/dev/raw", O_RDWR);
        if (q == -1) {
                if (errno == ENOENT) {
                        q = mknod("/dev/raw", S_IFCHR | 0600, makedev(RAW_MAJOR, 0));
                        if (q != -1) {
                                q = open("/dev/raw", O_RDWR);
                        }
                }
                if (q == -1) {
                        perror("Opening /dev/raw");
                        exit(99);
                }
        }
        for (min = 1; min < 256; min++) {
                struct raw_config_request rcr;
                int err;
                
                rcr.raw_minor = min;
                rcr.block_major = major(stb.st_rdev);
                rcr.block_minor = minor(stb.st_rdev);
                err = ioctl(q, RAW_SETBIND, &rcr);
                if (!err)
                        break;
                if (errno != EBUSY) {
                        fprintf(stderr, "Unexpected error from mapping raw %d: ioctl: %m\n", min);
                }
        }
        if (min >= 256) {
                close(q);
                fprintf(stderr, "No usable raw device found...\n");
                return 97;
        }
        sprintf(rawsubdev, "/dev/raw%u", min);
        mfd = open(rawsubdev, O_RDONLY);
        if (mfd == -1) {
                if (errno == ENOENT) {
                        mfd = mknod(rawsubdev, S_IFCHR | 0600, makedev(RAW_MAJOR, min));
                        if (mfd != -1) {
                                mfd = open(rawsubdev, O_RDWR);
                        }
                }
                if (mfd == -1) {
                        fprintf(stderr, "Opening %s: %m\n", rawsubdev);
                        exit(99);
                }
        }

        printf("Go\n");
        
        mlockall(MCL_CURRENT); /* so we do not swap out code, so
                                  signaling through pipes works as
                                  expected... */
        
        x4768();
        close(mfd);
        close(q);
        return 0;
}

If I started it 'strace -f -o logx -r ./oopsdemo /dev/hda', I was awarded with

Go
Process 30900 attached
Process 30901 attached
2a: 33
1a: 33
1b: 63
Process 30900 detached
Process 30901 detached

(well, it looks like that my HDD is still bootable, so write probably went
into wastebasket... erm, I opened /dev/raw1 with O_RDONLY. How is it possible
that I could even create PROT_READ|PROT_WRITE, MAP_SHARED mmapping ? )

Looks strange, isn't it? Both processes could read memory, and first process
could even write to its memory - even after read() failed due to vmtruncate...
and as first process caused to swap everything from memory (touching /dev/zero
mmaped pages), logfile filled up with

page->mapping == NULL

(caused by filemap_swap_out... without patch (see url below) you'll get nice
oops)

Other failure mode is -EIO or page->mapping == NULL in filemap_write_page, when
invoked through do_exit -> ... -> filemap_sync -> ... -> filemap_write_page.

For other details, look at http://platan.vc.cvut.cz/listit. From my findings it
looks to me like that it have something to do with holding page reference
while vmtruncate runs, but I know nothing... You can try playing with #if [01]
to get different failure modes...

I know, only root can do operations with /dev/raw, but... I'm not sure that you
have to play with raw devices to cause this problem... There are other places
which increment page->count temporarily... Or what's wrong...
                                                Best regards,
                                                        Petr Vandrovec
                                                        vandrove@vc.cvut.cz

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Oct 07 2000 - 21:00:19 EST