bug in linux 2.2.10 and 2.2.12 ip or network layer

Thomas Gschwendtner (tgs@bu.edu)
Fri, 10 Sep 1999 13:39:33 -0400


This is a multi-part message in MIME format.
--------------3C6B958F2A2076FEC872E02E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi all,

I discovered a bug in the linux network subsystem. Either the IP layer
or (more probably) the network layer drops packets occasionally. I
traced the packets using tcpdump and there are occasionally gaps in the
TCP sequence numbers, i.e. packets are missing. This is not a problem of
the BPF, because:
1. I run tcpdump also at the remote host and there are the same packets
missing and
2. the remote host returns duplicated acks.
I traced the packets which are passed fromm the TCP to the IP layer in
tcp_transmit_skb. No packet was missing there. The bug does not occur
when I use the loopback interface. (That's why I assume that the bug is
in the network layer and not in the IP.)

My setup is as follows:

I have two PPro 200 connected by a 100 Mbit ethernet and DECchip
ethernet cards (CONFIG_DEC_ELCP). I have a simple server which sends
data to a simple client. I attached the source code. Usage:

server <port> <number of bytes to send to client>
client <host name> <port> <number of clients> <log file name>

The client actually forks <number of clients> clients, i.e. you will
have <number of clients> simultaneously connections. The log file is for
internal use only, so don't ask, just give some file name.
I did not see the bug with 1, 2 or 3 clients, but it occurred always,
when I had 5 clients, i.e. 5 simultaneously TCP flows. So it seems to be
a race between flows. I tested 2.2.10 and 2.2.12. The bug occurrs in
both versions.

Can anyone comment on this? Has anyone else seen this bug?

Thank you.

tgs

PS:
Here is the output of ver_linux
-- Versions installed: (if some fields are empty or looks
-- unusual then possibly you have very old versions)
Linux RIVERWAY.BU.EDU 2.2.12 #5 Fri Sep 10 12:52:52 EDT 1999 i686
unknown
Kernel modules 2.1.121
Gnu C egcs-2.91.66
Binutils 2.9.1.0.23
Linux C Library 2.1.1
Dynamic linker ldd (GNU libc) 2.1.1
Procps 2.0.2
Mount 2.9o
Net-tools 1.51
Console-tools 1999.03.02
Sh-utils 1.16
--------------3C6B958F2A2076FEC872E02E
Content-Type: text/plain; charset=us-ascii;
name="client.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="client.c"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <sys/wait.h>

#define RECBUFSIZE 1024 /* Reception buffer size */
#define TIMEBUFSIZE 20

int main(int argc, char *argv[])
{
long sizetransf; /* How many bytes to receive */
char sizechar[80] = {0}; /* How many bytes in a string */
char sizeofsize;
int sock; /* Server connection socket */
struct sockaddr_in serv; /* Server connection socket address */
struct hostent *as; /* Server IP address structure */

char recbuf[RECBUFSIZE]; /* Reception buffer */
int reccount, i; /* Amount of data received */
double startclock, endclock; /* Transmission time variables */
struct timeval tv;
struct timezone tz;
FILE *fd;
char timebuf[TIMEBUFSIZE];
int pid, clients;

if (argc < 5)
{
fprintf(stderr, "%s <machine> <port> <number of clients> <log_file_name>\n", argv[0]);
exit(1);
}

if (!(as = gethostbyname(argv[1]))) /* Server IP address */
{
herror("gethostbyname() error");
exit(1);
}

if ((fd = fopen(argv[4], "w+")) == NULL )
{
perror("Cannot open log file");
exit(1);
}

if ((clients = atoi(argv[3])) == 0)
{
fprintf(stderr, "Number of clients can not be zero.\n");
exit(1);
}

/* Build server address structure */
serv.sin_family = AF_INET;
serv.sin_port = htons(atoi(argv[2]));
memcpy((char *) &serv.sin_addr, as->h_addr_list[0], as->h_length);

for (i = 0; i < clients; i++)
{
pid = fork();

if (pid == 0)
break;
}

if (pid != 0)
{
fclose(fd);

for (i = 0; i < clients; i++)
wait(NULL);

endclock = 0;

if ((fd = fopen(argv[4], "r")) == NULL )
{
perror("Cannot open log file");
exit(1);
}

while (fgets(timebuf, TIMEBUFSIZE, fd))
endclock += atof(timebuf);
endclock /= clients;

printf("Transfer time: %f\n", endclock);
fclose(fd);
exit(0);
}

fflush(NULL);

gettimeofday(&tv, &tz); /* Computes time to 1/100th second \/ */
startclock = (double)tv.tv_sec + (double)tv.tv_usec / 1000000;

reccount = 0;

/* Open socket */
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
perror("Cannot open a socket");
exit(1);
}

/* Connect to server */
if (connect(sock, (struct sockaddr *) &serv, sizeof(serv)) == -1)
{
perror("Cannot connect to the server");
exit(1);
}

/* Gets the amount of bytes to receive. Protocol: the amount as a */
/* string that follows the size of that string (for portability). */
recv(sock, (char *) &sizeofsize, sizeof(sizeofsize), 0);
printf("Size of transfere size received: %u\n", (int)sizeofsize);
recv(sock, sizechar, sizeofsize, 0);
sizetransf = atoi(sizechar);

printf("Transfere size received: %lu\n", sizetransf);

if (sizetransf > 0)
{
while ( reccount < sizetransf )
reccount += recv(sock, recbuf, RECBUFSIZE, 0);
}
else
{
fprintf(stderr, "Invalid data length");
exit(1);
}

close(sock); /* Closes the connection */

gettimeofday(&tv, &tz); /* Computes time in milli seconds */
endclock = (double)tv.tv_sec + (double)tv.tv_usec / 1000000;

printf(" %u bytes received\n", reccount);
printf(" Starttime: %.3f\n", startclock);
printf(" Endtime: %.3f\n", endclock);

endclock -= startclock;
fprintf(fd, "%f\n", endclock);
fclose(fd);

exit(0);
}

--------------3C6B958F2A2076FEC872E02E
Content-Type: text/plain; charset=us-ascii;
name="server.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="server.c"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <netdb.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
#include <linux/socket.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <netinet/in.h>

#define EMBUFSIZE 20000 /* Emission buffer size */

void EndChild(int sig);
void Service(int sock, int nof);

char embuf[EMBUFSIZE]; /* Emission buffer */
long sizetransf; /* How many bytes to send to clients */
char sizetransfchar[20];
int main(int argc, char *argv[])
{
int sock, a_sock; /* Listening socket; service socket */
struct sockaddr_in serv; /* Socket address */
int lg; /* Socket address size */
char nom[80]; /* Local host name */
int pid, i, port;

if (argc != 3)
{
fprintf(stderr, "%s <server port> <sizetransf>\n", argv[0]);
exit(1);
}

port = atoi(argv[1]);

sizetransf = atoi(argv[2]);
if (sizetransf <= 0)
{
fprintf(stderr, "Invalid transfer size: %s\n", argv[2]);
exit(1);
}
else
strcpy(sizetransfchar, argv[2]);

/* Initializes the pseudo-random number generator */
srand((unsigned int)time(NULL) / getpid());

/* Fill in the emission buffer with some random sequence */
for (i=0; i < EMBUFSIZE; i++)
embuf[i] = rand() % 256;

/* Open listening socket */
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
perror("Cannot open a socket");
exit(1);
}

/* Build listening socket address structure */
serv.sin_family = AF_INET; /* IP domain */
serv.sin_addr.s_addr = htonl(INADDR_ANY); /* Multiples addresses */
serv.sin_port = htons(port); /* fixed port number */

/* Bind listening socket */
if (bind(sock, (struct sockaddr *) &serv, sizeof(serv)) == -1)
{
perror("Cannot bind the socket");
exit(1);
}
gethostname(nom, 80);

/* Launch SIGCHLD handler */
signal(SIGCHLD, (void (*)())EndChild);

/* Launch connection queue */
listen(sock, 100); /* 100 clients max */

while (1) /* Server loop */
{
lg = sizeof(serv);

do
a_sock = accept(sock, (struct sockaddr *) &serv, &lg);
while (a_sock == -1);

pid = fork();
if (pid == 0)
{ /* Child process */
if (close(sock) < 0) /* Close the listening socket */
{
perror("Error closing listening socket");
exit(1);
}

Service(a_sock, i); /* Start the service */
}
else
{ /* Parent process */
if (close(a_sock) < 0) /* Close the service socket */
{
perror("Error closing service socket");
exit(1);
}
}
}
}

/* ------------------------------------------------------------------------ */
/* Detects and processes a child process actual termination. */
/* ------------------------------------------------------------------------ */
void EndChild(int sig)
{
while (wait3(NULL, WNOHANG, NULL) > 0); /* We don't want zombies */
signal(SIGCHLD, (void (*)())EndChild); /* Relaunch SIGCHLD handler */
}

/* ------------------------------------------------------------------------ */
/* Service function */
/* The server just sends <sizetransf> bytes and closes the connection. */
/* ------------------------------------------------------------------------ */
void Service(int sock, int nof)
{
char sizeofsize; /* Size of that string */
long sendcount = 0, length = EMBUFSIZE;
int j;

/* Send transfer size. Protocol: the size as a string that follows */
/* the size of that string (for portability). */
sizeofsize = strlen(sizetransfchar);
if (send(sock, (char *) &sizeofsize, sizeof(sizeofsize), 0) == -1)
{
perror("send size of transfer size");
exit(1);
}
if (send(sock, sizetransfchar, sizeofsize, 0) == -1)
{
perror("send transfer size");
exit(1);
}

srand(nof);

/* Send data in length chunks */
while(sendcount < sizetransf )
{
length = 1 + (int)((float)EMBUFSIZE * rand() / (RAND_MAX + 1.0));
if (sizetransf - sendcount < length)
length = sizetransf - sendcount;

j = send(sock, embuf, length, 0);
if (j == -1)
{
perror("send data");
exit(1);
}

sendcount += j;
}

if (close(sock) < 0) /* Close the server socket */
{
perror("Error closing server socket");
exit(1);
}
exit(0);
}

--------------3C6B958F2A2076FEC872E02E--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/