From: Eric Wong <normalperson@yhbt.net>
To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Andreas Voellmy <andreas.voellmy@yale.edu>,
viro@zeniv.linux.org.uk, linux-fsdevel@vger.kernel.org,
"Junchang(Jason) Wang" <junchang.wang@yale.edu>
Subject: ppoll() stuck on POLLIN while TCP peer is sending
Date: Fri, 28 Dec 2012 01:45:03 +0000 [thread overview]
Message-ID: <20121228014503.GA5017@dcvr.yhbt.net> (raw)
I'm finding ppoll() unexpectedly stuck when waiting for POLLIN on a
local TCP socket. The isolated code below can reproduces the issue
after many minutes (<1 hour). It might be easier to reproduce on
a busy system while disk I/O is happening.
This may also be related to an epoll-related issue reported
by Andreas Voellmy:
http://thread.gmane.org/gmane.linux.kernel/1408782/
My example involves a 3 thread data flow between two pairs
of (4) sockets:
send_loop -> recv_loop(recv_send) -> recv_loop(recv_only)
pair_a[1] -> (pair_a[0] -> pair_b[1]) -> pair_b[0]
At least 3.7 and 3.7.1 are affected.
I have tcp_low_latency=1 set, I will try 0 later
The last progress message I got was after receiving 2942052597760
bytes on fd=7 (out of 64-bit ULONG_MAX / 2)
strace:
3644 sendto(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0, NULL, 0 <unfinished ...>
3643 sendto(6, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0, NULL, 0 <unfinished ...>
3642 ppoll([{fd=7, events=POLLIN}], 1, NULL, NULL, 8 <unfinished ...>
3641 futex(0x7f23ed8129d0, FUTEX_WAIT, 3644, NULL <unfinished ...>
The first and last lines of the strace are expected:
+ 3644 sendto(4) is blocked because 3643 is blocked on sendto(fd=6)
and not able to call recv().
+ 3641 is the main thread calling pthread_join
What is unexpected is the tid=3643 and tid=3642 interaction. As confirmed
by lsof below, fd=6 is sending to wake up fd=7, but ppoll(fd=7) seems
to not be waking up.
lsof:
toosleepy 3641 ew 4u IPv4 12405 0t0 TCP localhost:55904->localhost:33249 (ESTABLISHED)
toosleepy 3641 ew 5u IPv4 12406 0t0 TCP localhost:33249->localhost:55904 (ESTABLISHED)
toosleepy 3641 ew 6u IPv4 12408 0t0 TCP localhost:48777->localhost:33348 (ESTABLISHED)
toosleepy 3641 ew 7u IPv4 12409 0t0 TCP localhost:33348->localhost:48777 (ESTABLISHED)
System info: Linux 3.7.1 x86_64 SMP PREEMPT
AMD Phenom(tm) II X4 945 Processor (4 cores)
Nothing interesting in dmesg, iptables rules are empty.
I have not yet been able to reproduce the issue using UNIX sockets,
only TCP, but you can run:
./toosleepy unix
...to test with UNIX sockets intead of TCP.
The following code is also available via git://bogomips.org/toosleepy
gcc -o toosleepy -O2 -Wall -lpthread toosleepy.c
-------------------------------- 8< ------------------------------------
#define _GNU_SOURCE
#include <poll.h>
#include <sys/ioctl.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/tcp.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <limits.h>
struct receiver {
int rfd;
int sfd;
};
/* blocking sender */
static void * send_loop(void *fdp)
{
int fd = *(int *)fdp;
char buf[16384];
ssize_t s;
size_t sent = 0;
size_t max = (size_t)ULONG_MAX / 2;
while (sent < max) {
s = send(fd, buf, sizeof(buf), 0);
if (s > 0)
sent += s;
if (s == -1)
assert(errno == EINTR);
}
dprintf(2, "%d done sending: %zu\n", fd, sent);
close(fd);
return NULL;
}
/* non-blocking receiver, using ppoll */
static void * recv_loop(void *p)
{
const struct receiver *rcvr = p;
char buf[16384];
nfds_t nfds = 1;
struct pollfd fds;
int rc;
ssize_t r, s;
size_t received = 0;
size_t sent = 0;
for (;;) {
r = recv(rcvr->rfd, buf, sizeof(buf), 0);
if (r == 0) {
break;
} else if (r == -1) {
assert(errno == EAGAIN);
fds.fd = rcvr->rfd;
fds.events = POLLIN;
errno = 0;
rc = ppoll(&fds, nfds, NULL, NULL);
assert(rc == 1);
} else {
assert(r > 0);
received += r;
if (rcvr->sfd >= 0) {
s = send(rcvr->sfd, buf, sizeof(buf), 0);
if (s > 0)
sent += s;
if (s == -1)
assert(errno == EINTR);
} else {
/* just burn some cycles */
write(-1, buf, sizeof(buf));
}
}
if ((received % (sizeof(buf) * sizeof(buf) * 16) == 0))
dprintf(2, " %d progress: %zu\n",
rcvr->rfd, received);
}
dprintf(2, "%d got: %zu\n", rcvr->rfd, received);
if (rcvr->sfd >= 0) {
dprintf(2, "%d sent: %zu\n", rcvr->sfd, sent);
close(rcvr->sfd);
}
return NULL;
}
static void tcp_socketpair(int sv[2], int accept_flags)
{
struct sockaddr_in addr;
socklen_t addrlen = sizeof(addr);
int l = socket(PF_INET, SOCK_STREAM, 0);
int c = socket(PF_INET, SOCK_STREAM, 0);
int a;
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = 0;
assert(0 == bind(l, (struct sockaddr*)&addr, addrlen));
assert(0 == listen(l, 1024));
assert(0 == getsockname(l, (struct sockaddr *)&addr, &addrlen));
assert(0 == connect(c, (struct sockaddr *)&addr, addrlen));
a = accept4(l, NULL, NULL, accept_flags);
assert(a >= 0);
close(l);
sv[0] = a;
sv[1] = c;
}
int main(int argc, char *argv[])
{
int pair_a[2];
int pair_b[2];
pthread_t s, rs, r;
struct receiver recv_only;
struct receiver recv_send;
if (argc == 2 && strcmp(argv[1], "unix") == 0) {
int val;
assert(0 == socketpair(AF_UNIX, SOCK_STREAM, 0, pair_a));
assert(0 == socketpair(AF_UNIX, SOCK_STREAM, 0, pair_b));
/* only make the receiver non-blocking */
val = 1;
assert(0 == ioctl(pair_a[0], FIONBIO, &val));
val = 1;
assert(0 == ioctl(pair_b[0], FIONBIO, &val));
} else {
tcp_socketpair(pair_a, SOCK_NONBLOCK);
tcp_socketpair(pair_b, SOCK_NONBLOCK);
}
recv_send.rfd = pair_a[0];
recv_send.sfd = pair_b[1];
recv_only.rfd = pair_b[0];
recv_only.sfd = -1;
/*
* data flow:
* send_loop -> recv_loop(recv_send) -> recv_loop(recv_only)
* pair_a[1] -> (pair_a[0] -> pair_b[1]) -> pair_b[0]
*/
assert(0 == pthread_create(&r, NULL, recv_loop, &recv_only));
assert(0 == pthread_create(&rs, NULL, recv_loop, &recv_send));
assert(0 == pthread_create(&s, NULL, send_loop, &pair_a[1]));
assert(0 == pthread_join(s, NULL));
assert(0 == pthread_join(rs, NULL));
assert(0 == pthread_join(r, NULL));
return 0;
}
-------------------------------- 8< ------------------------------------
Any help/suggestions/test patches would be greatly appreciated.
Thanks for reading!
--
Eric Wong
next reply other threads:[~2012-12-28 1:45 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-12-28 1:45 Eric Wong [this message]
2012-12-28 7:06 ` ppoll() stuck on POLLIN while TCP peer is sending Eric Wong
2012-12-29 11:34 ` Eric Wong
2012-12-31 13:21 ` [PATCH] poll: prevent missed events if _qproc is NULL Eric Wong
2012-12-31 23:24 ` Eric Wong
2013-01-01 16:58 ` Junchang(Jason) Wang
2013-01-01 18:42 ` Eric Dumazet
2013-01-01 21:00 ` Eric Wong
2013-01-01 21:17 ` Eric Wong
2013-01-01 22:53 ` Linus Torvalds
2013-01-01 23:21 ` Junchang(Jason) Wang
2013-01-01 23:56 ` [PATCH] epoll: prevent missed events on EPOLL_CTL_MOD Eric Wong
2013-01-02 17:45 ` Eric Dumazet
2013-01-02 18:40 ` Eric Wong
2013-01-02 19:03 ` Eric Dumazet
2013-01-02 19:32 ` Eric Wong
2013-01-02 22:08 ` Eric Dumazet
2013-01-02 21:16 ` Eric Wong
2013-01-02 20:08 ` ppoll() stuck on POLLIN while TCP peer is sending Eric Wong
2013-01-02 20:47 ` Eric Wong
2013-01-03 13:41 ` Eric Dumazet
2013-01-03 18:32 ` Eric Wong
2013-01-03 23:45 ` Eric Wong
2013-01-04 0:26 ` Eric Wong
2013-01-04 3:52 ` Eric Wong
2013-01-04 16:01 ` Mel Gorman
2013-01-04 17:15 ` Eric Dumazet
2013-01-04 17:59 ` Eric Wong
2013-01-05 1:07 ` Eric Wong
2013-01-06 12:07 ` Eric Wong
2013-01-07 12:25 ` Mel Gorman
2013-01-07 22:38 ` Eric Dumazet
2013-01-08 0:21 ` Eric Wong
2013-01-07 22:38 ` Eric Wong
2013-01-08 20:14 ` Eric Wong
2013-01-08 22:43 ` Mel Gorman
2013-01-08 23:23 ` Eric Wong
2013-01-09 2:14 ` Eric Dumazet
2013-01-09 2:32 ` Eric Dumazet
2013-01-09 2:54 ` Eric Dumazet
2013-01-09 3:55 ` Eric Wong
2013-01-09 8:42 ` Eric Wong
2013-01-09 8:51 ` Eric Wong
2013-01-09 13:42 ` Mel Gorman
2013-01-09 13:37 ` Mel Gorman
2013-01-09 13:50 ` Mel Gorman
2013-01-10 9:25 ` Eric Wong
2013-01-10 19:42 ` Mel Gorman
2013-01-10 20:03 ` Eric Wong
2013-01-10 20:58 ` Eric Dumazet
2013-01-11 0:51 ` Eric Wong
2013-01-11 9:30 ` Mel Gorman
2013-01-09 21:29 ` Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20121228014503.GA5017@dcvr.yhbt.net \
--to=normalperson@yhbt.net \
--cc=andreas.voellmy@yale.edu \
--cc=junchang.wang@yale.edu \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox