Re: PROBLEM: Data corruption when pasting large data to terminal

From: Egmont Koblinger
Date: Mon Feb 20 2012 - 16:11:54 EST


Hi,

I attach a simple self-contained test case that triggers the bug most
of the time. Moreover, it turns out that we're facing a data
corruption plus a deadlock issue -- often the test triggers randomly
one of them.

The test is a slight modification of Bruno's example (thanks!). The
most important change is: it emulates a readline app by setting the
terminal to cooked mode and doing some "work" (1 millisecond of sleep)
after every newline, then reverting it to raw mode.

Minor changes also include: ignoring the last 100 bytes (potentially
an incomplete line that stays in the kernel's buffer, the slave
doesn't expect that to arrive), plus a long sleep on the master after
writing its output (ugly hack, but definitely long enough to give the
slave time to read everything).

The behavior is:
- Often: Corrupt data read (\r versus \n changes, as well as actual
loss of data), as reported by the slave.
- Often: Deadlock, the slave hangs in a read() reading from the
terminal, while the master hangs on its write() at the same time.

You can play with parameters like the buffer size, the write size
(wsz), the blocking vs. nonblocking mode of write, TCSETS versus
TCSETSW -- they don't make much of a difference.

What does make a difference though, is the read size (rsz). The bug
is reproducible if and only if the read size is a divisor of the
length of the line excluding the terminating newline (i.e. the length
of the full line minus one); that is, a divisor of 62 in this example.
So a read size of 1 (which is used by readline) triggers the bug with
all kinds of data; larger read sizes only with certain well-crafted
buffers.

Also, the bug is still only reproducible after writing at least 4kB.

This gives me a guts feeling (without actually studying the kernel's
source) that it might be some circular buffer overrun: whenever
there's only 1 byte left in the buffer, the final newline of a line,
the writer can incorrectly wrap around in a 4k buffer and override
that -- does this make any sense?

Interestingly, the test uses \n and \r reversed compare to real life
(the buffer should contain \r instead of \n, and ICRNL should be used
instead of INLCR) -- for some reason this test didn't trigger the bug
for me after swapping the two, I don't know why.

Anyway, I hope that this test case and my findings about the read size
helps catch and fix the bug.

Thanks a lot,
egmont
#define _XOPEN_SOURCE 700
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <sys/wait.h>
#include <sys/select.h>
#include <sys/ioctl.h>
#include <termios.h>

#define BUFF_SZ (4096*256)
// Expect fewer bytes than the master writes, because the last incomplete line
// is not sent to the slave in cooked mode.
#define READ_BUFF_SZ (BUFF_SZ - 100)

void raw(int pty) {
struct termios t;
ioctl(pty, TCGETS, &t);
t.c_lflag &= ~ICANON;
t.c_iflag &= ~INLCR;
ioctl(pty, TCSETSW, &t);
}

void cooked(int pty) {
struct termios t;
ioctl(pty, TCGETS, &t);
t.c_lflag |= ICANON;
t.c_iflag |= INLCR;
ioctl(pty, TCSETSW, &t);
}

void write_buffer(const char *buff, size_t buff_sz, const char *fname) {
int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0664);
size_t n = 0;
ssize_t r;
if (!fd) {
fprintf(stderr, "Failed to open(3) %s: %s\n", fname, strerror(errno));
return;
}

do {
r = write(fd, buff + n, buff_sz - n);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue;
fprintf(stderr, "Failed to write(2): %s\n", strerror(errno));
return;
} else if (r == 0) {
break;
} else {
n += r;
}
} while (n < buff_sz);
close(fd);
}

void ptmx_slave_test(int pty, const char *line, size_t rsz) {
char *buff = malloc(READ_BUFF_SZ);
size_t n = 0, nn;
ssize_t r;
int l, bad;
struct timespec slen;

if (!buff) {
fprintf(stderr, "Failed to malloc(3): %s\n", strerror(errno));
return;
}
memset(buff, 0, READ_BUFF_SZ);

raw(pty); // emulate the initialization of a readline app
do {
r = read(pty, buff + n, rsz + n > READ_BUFF_SZ ? READ_BUFF_SZ - n : rsz);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue;
fprintf(stderr, "Failed to read(2) after reading %d bytes: %s\n", n, strerror(errno));
break; // Despite the error, compare the buffer against the reference.
} else if (r == 0) {
if (n < READ_BUFF_SZ)
fprintf(stderr, "Read %zu bytes, expected %zu!\n", n, READ_BUFF_SZ);
break;
} else {
if (buff[n] == '\n') {
// emulate a readline app taking action on the input
cooked(pty);
memset(&slen, 0, sizeof(slen));
slen.tv_nsec = 1000 * 1000;
nanosleep(&slen, NULL);
raw(pty);
}
n += r;
}
} while (n < READ_BUFF_SZ);
nn = n;

/* check buffer if it matches expected value... */
r = strlen(line);
l = 0;
bad = 0;
for (n = 0; n < READ_BUFF_SZ; n += r+1) {
l++;
if (memcmp(buff + n, line, n + r < READ_BUFF_SZ ? r : READ_BUFF_SZ - n) != 0) {
// TODO: determine position of breakage!
fprintf(stderr, "Line data mismatch for line %d!\n", l);
bad = 1;
break;
}
if (n + r + 1 < READ_BUFF_SZ && buff[n+r] != '\n') {
if (!bad)
fprintf(stderr, "Expecting '\\n' at end of line %d, but found 0x%hhx\n", l, buff[n+r]);
bad = 1;
// Don't break, see if there's a more serious mistake than a \r -> \n.
}
}

// fprintf(stderr, "Buffer seen by slave is:\n");
// fwrite(buff, READ_BUFF_SZ, 1, stdout);
if (bad) {
write_buffer(buff, nn, "/tmp/ptmx_out.txt");
fprintf(stderr, "See payload in /tmp/ptmx_out.txt\n");
} else
fprintf(stderr, "slave says: everything's okay\n");
}

void ptmx_master_test(int pty, const char *line, size_t wsz) {
char *buff = malloc(BUFF_SZ);
size_t n = 0;
ssize_t r;

if (!buff) {
fprintf(stderr, "Failed to malloc(3): %s\n", strerror(errno));
return;
}

/* initialize buffer */
r = strlen(line);
for (n = 0; n < BUFF_SZ; n += r+1) {
memcpy(buff + n, line, n + r < BUFF_SZ ? r : BUFF_SZ - n);
if (n + r + 1 < BUFF_SZ)
buff[n+r] = '\n';
}

n = 0;
do {
fprintf(stderr, "write %d\n", wsz + n > BUFF_SZ ? BUFF_SZ - n : wsz);
r = write(pty, buff + n, wsz + n > BUFF_SZ ? BUFF_SZ - n : wsz);
fprintf(stderr, " -> wrote %d\n", r);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
fd_set write_fds;
FD_ZERO(&write_fds);
FD_SET(pty, &write_fds);
select(pty+1, NULL, &write_fds, NULL, NULL);
continue;
}
fprintf(stderr, "Failed to write(2): %s\n", strerror(errno));
return;
} else if (r == 0) {
break;
} else {
n += r;
}
} while (n < BUFF_SZ);
fprintf(stderr, "master is sleeping now...\n");
sleep(10);
fprintf(stderr, "master exiting\n");
close(pty);
write_buffer(buff, BUFF_SZ, "/tmp/ptmx_in.txt");
}

int main() {
const char *line = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char *ptsdname = NULL;
int pty, pid;
size_t rsz = 31, wsz = 4096 * 256;

pty = open("/dev/ptmx", O_RDWR /* | O_NONBLOCK */);
if (pty == -1) {
fprintf(stderr, "Failed to open(3) /dev/ptmx: %s\n", strerror(errno));
return 1;
}

ptsdname = ptsname(pty);
fprintf(stderr, "ptsname = %s\n", ptsdname);
if (!ptsdname) {
fprintf(stderr, "Failed to ptsname(3): %s\n", strerror(errno));
close(pty);
return 1;
}

if (grantpt(pty) == -1) {
fprintf(stderr, "Failed to grantpty(3): %s\n", strerror(errno));
close(pty);
return 1;
}

if (unlockpt(pty) == -1) {
fprintf(stderr, "Failed to unlockpt(3): %s\n", strerror(errno));
close(pty);
return 1;
}

pid = fork();
if (pid == -1) {
fprintf(stderr, "Failed to fork(3): %s\n", strerror(errno));
close(pty);
return 1;
} else if (pid == 0) {
close(pty);

pty = open(ptsdname, O_RDWR);
if (pty == -1) {
fprintf(stderr, "Failed to open(3) %s: %s\n", ptsdname, strerror(errno));
return 1;
}

ptmx_slave_test(pty, line, rsz);
close(pty);
return 0;
} else {
int s;
ptmx_master_test(pty, line, wsz);

if (waitpid(pid, &s, 0) == -1) {
fprintf(stderr, "Failed to waitpid(2) for %d: %s\n", pid, strerror(errno));
return 1;
}
if (WIFEXITED(s) && WEXITSTATUS(s) == 0)
return 0;
if (WIFEXITED(s))
fprintf(stderr, "Child exited with %d\n", WEXITSTATUS(s));
else if (WIFSIGNALED(s))
fprintf(stderr, "Child died with signal %d\n", WTERMSIG(s));
else
fprintf(stderr, "Child terminated in an unknown way with status %d\n", s);
return 1;
}
}