(splice socket -> pipe) + EPOLLET -> epoll_wait does not not wake up !

From: ÐÐÑÐ ÐÐÑÐÐÐÐÑÐ
Date: Thu Oct 13 2011 - 10:54:31 EST


The problem:

man 7 epoll said:
For Âstream-oriented Âfiles Â(e.g., Âpipe, FIFO, stream socket), the
condition that the read/write I/O space is exhausted can also be
detected by checking the amount of data read from / written to the
target file descriptor. ÂFor example, if you call read(2) by asking to
read a certain amount of data and read(2) returns a lower number of
bytes, you Âcan Âbe Âsure of having exhausted the read I/O space for
the file descriptor.

I decide to use splice socket -> pipe instead of recv. So I have
registered socket's fd in epoll with EPOLLIN|EPOLLET.

When data appear in socket faster than I splice() it from socket, the
following sometimes appear:

1. in my code I sure, that pipe is empty.
2. my code do splice(socket, pipe, 65536)
3. splice return, say, 53248
4. my code accordingly to man, decide not to fire splice() again, as
it thinks that it will return EWOULDBLOCK=EAGAIN.
5. so, my code go to epoll_wait to wait for EPOLLIN on socket
6. epoll hangs.

This is not appear if I do just recv(). But it may be because speed is
lower, and some race condition in effect.

The hacked version of strace output is attached.
epoll_create(EPOLL_CLOEXEC) = 6

/* 4 is a socket of HTTP_CLIENT. (got from accept4(SOCK_CLOEXEC|SOCK_NONBLOCK)) */
epoll_ctl(6, EPOLL_CTL_ADD, 4, {EPOLLIN|EPOLLOUT|EPOLLET|EPOLLRDHUP, {fd=4}}) = 0
pipe2([7, 9], O_NONBLOCK|O_CLOEXEC) = 0

/* Creating socket for a HTTP_SERVER */
socket(PF_INET, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_IP) = 10
connect(10, {sa_family=AF_INET, sin_port=htons(80), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EINPROGRESS (Operation now in progress)
epoll_ctl(6, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT|EPOLLET|0x2000, {fd=10}}) = 0
pipe2([11, 12], O_NONBLOCK|O_CLOEXEC) = 0
epoll_wait(6, {{EPOLLOUT, {fd=4}}, {EPOLLOUT, {fd=10}}}, 100, -1) = 2
getsockopt(10, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 421
splice(7, 0, 10, 0, 0x1a5, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 421
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 801
splice(11, 0, 4, 0, 0x321, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 801
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
splice(7, 0, 10, 0, 0x159, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
splice(11, 0, 4, 0, 0x1f6, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 458
splice(7, 0, 10, 0, 0x1ca, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 458
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 929
splice(11, 0, 4, 0, 0x3a1, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 929
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
splice(7, 0, 10, 0, 0x159, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
splice(11, 0, 4, 0, 0x1f6, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 543
splice(7, 0, 10, 0, 0x21f, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 543
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 348
splice(11, 0, 4, 0, 0x15c, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 348
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 543
splice(7, 0, 10, 0, 0x21f, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 543
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 38787
splice(11, 0, 4, 0, 0x9783, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 38787
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
splice(7, 0, 10, 0, 0x159, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 345
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
splice(11, 0, 4, 0, 0x1f6, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 502
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(4, 0, 9, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 519
splice(7, 0, 10, 0, 0x207, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 519
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 49152
splice(11, 0, 4, 0, 0xc000, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 49152
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 49448
splice(11, 0, 4, 0, 0xc128, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 49448
epoll_wait(6, {{EPOLLIN|EPOLLOUT, {fd=10}}}, 100, -1) = 1
splice(10, 0, 12, 0, 65536, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 53248
splice(11, 0, 4, 0, 53248, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 32472
epoll_wait(6, {{EPOLLOUT, {fd=4}}}, 100, -1) = 1
splice(11, 0, 4, 0, 20776, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) = 20776
epoll_wait(6, +++ killed by SIGINT +++