Re: PROBLEM: Data corruption when pasting large data to terminal

From: Bruno PrÃmont
Date: Fri Feb 17 2012 - 17:04:22 EST


Hi,

On Fri, 17 February 2012 Pavel Machek <pavel@xxxxxx> wrote:
> > > Sorry, I didn't emphasize the point that makes me suspect it's a kernel issue:
> > >
> > > - strace reveals that the terminal emulator writes the correct data
> > > into /dev/ptmx, and the kernel reports no short writes(!), all the
> > > write(..., ..., 68) calls actually return 68 (the length of the
> > > example file's lines incl. newline; I'm naively assuming I can trust
> > > strace here.)
> > > - strace reveals that the receiving application (bash) doesn't receive
> > > all the data from /dev/pts/N.
> > > - so: the data gets lost after writing to /dev/ptmx, but before
> > > reading it out from /dev/pts/N.
> >
> > Which it will, if the reader doesn't read fast enough, right? Is the
> > data somewhere guaranteed to never "overrun" the buffer? If so, how do
> > we handle not just running out of memory?
>
> Start blocking the writer?

I did quickly write a small test program (attached). It forks a reader child
and sends data over to it, at the end both write down their copy of the buffer
to a /tmp/ptmx_{in,out}.txt file for manual comparing results (in addition
to basic output of mismatch start line)

From the time it took the writer to write larger buffers (as seen using strace)
it seems there *is* some kind of blocking, but it's not blocking long enough
or unblocking too early if the reader does not keep up.


For quick and dirty testing of effects of buffer sizes, tune "rsz", "wsz"
and "line" in main() as well as total size with BUFF_SZ define.


The effects for me are that writer writes all data but reader never sees tail
of written data (how much is being seen seems variable, probably matter of
scheduling, frequency scaling and similar racing factors).

My test system is single-core uniprocessor centrino laptop (32bit x86) with
3.2.5 kernel.

Bruno
#define _XOPEN_SOURCE 700
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <sys/wait.h>

#define BUFF_SZ (4096*64)

void write_buffer(const char *buff, size_t buff_sz, const char *fname) {
int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0664);
size_t n = 0;
ssize_t r;
if (!fd) {
fprintf(stderr, "Failed to open(3) %s: %s\n", fname, strerror(errno));
return;
}

do {
r = write(fd, buff + n, buff_sz - n);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue;
fprintf(stderr, "Failed to write(2): %s\n", strerror(errno));
return;
} else if (r == 0) {
break;
} else {
n += r;
}
} while (n < buff_sz);
close(fd);
}

void ptmx_slave_test(int pty, const char *line, size_t rsz) {
char *buff = malloc(BUFF_SZ);
size_t n = 0, nn;
ssize_t r;
int l, bad;
struct timespec slen;

if (!buff) {
fprintf(stderr, "Failed to malloc(3): %s\n", strerror(errno));
return;
}

do {
r = read(pty, buff + n, rsz + n > BUFF_SZ ? BUFF_SZ - n : rsz);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue;
fprintf(stderr, "Failed to read(2): %s\n", strerror(errno));
return;
} else if (r == 0) {
if (n < BUFF_SZ)
fprintf(stderr, "Read %zu bytes, expected %zu!\n", n, BUFF_SZ);
break;
} else {
n += r;
}
memset(&slen, 0, sizeof(slen));
nanosleep(&slen, NULL);
} while (n < BUFF_SZ);
nn = n;

/* check buffer if it matches expected value... */
r = strlen(line);
l = 0;
bad = 0;
for (n = 0; n < BUFF_SZ; n += r+1) {
l++;
if (memcmp(buff + n, line, n + r < BUFF_SZ ? r : BUFF_SZ - n) != 0) {
// TODO: determine position of breakage!
fprintf(stderr, "Line data mismatch for line %d!\n", l);
bad = 1;
break;
}
if (n + r + 1 < BUFF_SZ && buff[n+r] != '\n') {
fprintf(stderr, "Expecting '\\n' at end of line %d, but found 0x%hhx\n", l, buff[n+r]);
bad = 1;
break;
}
}

// fprintf(stderr, "Buffer seen by slave is:\n");
// fwrite(buff, BUFF_SZ, 1, stdout);
if (bad)
write_buffer(buff, nn, "/tmp/ptmx_out.txt");
}

void ptmx_master_test(int pty, const char *line, size_t wsz) {
char *buff = malloc(BUFF_SZ);
size_t n = 0;
ssize_t r;

if (!buff) {
fprintf(stderr, "Failed to malloc(3): %s\n", strerror(errno));
return;
}

/* initialize buffer */
r = strlen(line);
for (n = 0; n < BUFF_SZ; n += r+1) {
memcpy(buff + n, line, n + r < BUFF_SZ ? r : BUFF_SZ - n);
if (n + r + 1 < BUFF_SZ)
buff[n+r] = '\n';
}

n = 0;
do {
r = write(pty, buff + n, wsz + n > BUFF_SZ ? BUFF_SZ - n : wsz);
if (r == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue;
fprintf(stderr, "Failed to write(2): %s\n", strerror(errno));
return;
} else if (r == 0) {
break;
} else {
n += r;
}
} while (n < BUFF_SZ);
close(pty);
write_buffer(buff, BUFF_SZ, "/tmp/ptmx_in.txt");
}

int main() {
const char *line = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char *ptsdname = NULL;
int pty, pid;
size_t rsz = 128, wsz = 1024;


pty = open("/dev/ptmx", O_RDWR | O_CLOEXEC);
if (pty == -1) {
fprintf(stderr, "Failed to open(3) /dev/ptmx: %s\n", strerror(errno));
return 1;
}

ptsdname = ptsname(pty);
if (!ptsdname) {
fprintf(stderr, "Failed to ptsname(3): %s\n", strerror(errno));
close(pty);
return 1;
}

if (grantpt(pty) == -1) {
fprintf(stderr, "Failed to grantpty(3): %s\n", strerror(errno));
close(pty);
return 1;
}

if (unlockpt(pty) == -1) {
fprintf(stderr, "Failed to unlockpt(3): %s\n", strerror(errno));
close(pty);
return 1;
}

pid = fork();
if (pid == -1) {
fprintf(stderr, "Failed to fork(3): %s\n", strerror(errno));
close(pty);
return 1;
} else if (pid == 0) {
close(pty);

pty = open(ptsdname, O_RDWR | O_CLOEXEC);
if (pty == -1) {
fprintf(stderr, "Failed to open(3) %s: %s\n", ptsdname, strerror(errno));
return 1;
}

ptmx_slave_test(pty, line, rsz);
close(pty);
return 0;
} else {
int s;
ptmx_master_test(pty, line, wsz);

if (waitpid(pid, &s, 0) == -1) {
fprintf(stderr, "Failed to waitpid(2) for %d: %s\n", pid, strerror(errno));
return 1;
}
if (WIFEXITED(s) && WEXITSTATUS(s) == 0)
return 0;
if (WIFEXITED(s))
fprintf(stderr, "Child exited with %d\n", WEXITSTATUS(s));
else if (WIFSIGNALED(s))
fprintf(stderr, "Child died with signal %d\n", WTERMSIG(s));
else
fprintf(stderr, "Child terminated in an unknown way with status %d\n", s);
return 1;
}
}