All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Wong <normalperson@yhbt.net>
To: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Andreas Voellmy <andreas.voellmy@yale.edu>,
	viro@zeniv.linux.org.uk, linux-fsdevel@vger.kernel.org,
	"Junchang(Jason) Wang" <junchang.wang@yale.edu>
Subject: ppoll() stuck on POLLIN while TCP peer is sending
Date: Fri, 28 Dec 2012 01:45:03 +0000	[thread overview]
Message-ID: <20121228014503.GA5017@dcvr.yhbt.net> (raw)

I'm finding ppoll() unexpectedly stuck when waiting for POLLIN on a
local TCP socket.  The isolated code below can reproduces the issue
after many minutes (<1 hour).  It might be easier to reproduce on
a busy system while disk I/O is happening.

This may also be related to an epoll-related issue reported
by Andreas Voellmy:
http://thread.gmane.org/gmane.linux.kernel/1408782/

My example involves a 3 thread data flow between two pairs
of (4) sockets:

	 send_loop ->   recv_loop(recv_send)   -> recv_loop(recv_only)
	 pair_a[1] -> (pair_a[0] -> pair_b[1]) -> pair_b[0]

At least 3.7 and 3.7.1 are affected.

I have tcp_low_latency=1 set, I will try 0 later

The last progress message I got was after receiving 2942052597760
bytes on fd=7 (out of 64-bit ULONG_MAX / 2)

strace:

3644  sendto(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0, NULL, 0 <unfinished ...>
3643  sendto(6, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0, NULL, 0 <unfinished ...>
3642  ppoll([{fd=7, events=POLLIN}], 1, NULL, NULL, 8 <unfinished ...>
3641  futex(0x7f23ed8129d0, FUTEX_WAIT, 3644, NULL <unfinished ...>

The first and last lines of the strace are expected:

+ 3644	sendto(4) is blocked because 3643 is blocked on sendto(fd=6)
  and not able to call recv().
+ 3641 is the main thread calling pthread_join

What is unexpected is the tid=3643 and tid=3642 interaction.  As confirmed
by lsof below, fd=6 is sending to wake up fd=7, but ppoll(fd=7) seems
to not be waking up.

lsof:
toosleepy 3641   ew    4u  IPv4  12405      0t0     TCP localhost:55904->localhost:33249 (ESTABLISHED)
toosleepy 3641   ew    5u  IPv4  12406      0t0     TCP localhost:33249->localhost:55904 (ESTABLISHED)
toosleepy 3641   ew    6u  IPv4  12408      0t0     TCP localhost:48777->localhost:33348 (ESTABLISHED)
toosleepy 3641   ew    7u  IPv4  12409      0t0     TCP localhost:33348->localhost:48777 (ESTABLISHED)

System info: Linux 3.7.1 x86_64 SMP PREEMPT
AMD Phenom(tm) II X4 945 Processor (4 cores)
Nothing interesting in dmesg, iptables rules are empty.

I have not yet been able to reproduce the issue using UNIX sockets,
only TCP, but you can run:

  ./toosleepy unix

...to test with UNIX sockets intead of TCP.

The following code is also available via git://bogomips.org/toosleepy
gcc -o toosleepy -O2 -Wall -lpthread toosleepy.c
-------------------------------- 8< ------------------------------------
#define _GNU_SOURCE
#include <poll.h>
#include <sys/ioctl.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/tcp.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <limits.h>

struct receiver {
	int rfd;
	int sfd;
};

/* blocking sender */
static void * send_loop(void *fdp)
{
	int fd = *(int *)fdp;
	char buf[16384];
	ssize_t s;
	size_t sent = 0;
	size_t max = (size_t)ULONG_MAX / 2;

	while (sent < max) {
		s = send(fd, buf, sizeof(buf), 0);
		if (s > 0)
			sent += s;
		if (s == -1)
			assert(errno == EINTR);
	}
	dprintf(2, "%d done sending: %zu\n", fd, sent);
	close(fd);
	return NULL;
}

/* non-blocking receiver, using ppoll */
static void * recv_loop(void *p)
{
	const struct receiver *rcvr = p;
	char buf[16384];
	nfds_t nfds = 1;
	struct pollfd fds;
	int rc;
	ssize_t r, s;
	size_t received = 0;
	size_t sent = 0;

	for (;;) {
		r = recv(rcvr->rfd, buf, sizeof(buf), 0);
		if (r == 0) {
			break;
		} else if (r == -1) {
			assert(errno == EAGAIN);

			fds.fd = rcvr->rfd;
			fds.events = POLLIN;
			errno = 0;
			rc = ppoll(&fds, nfds, NULL, NULL);
			assert(rc == 1);
		} else {
			assert(r > 0);
			received += r;
			if (rcvr->sfd >= 0) {
				s = send(rcvr->sfd, buf, sizeof(buf), 0);
				if (s > 0)
					sent += s;
				if (s == -1)
					assert(errno == EINTR);
			} else {
				/* just burn some cycles */
				write(-1, buf, sizeof(buf));
			}
		}
		if ((received % (sizeof(buf) * sizeof(buf) * 16) == 0))
			dprintf(2, " %d progress: %zu\n",
			        rcvr->rfd, received);
	}
	dprintf(2, "%d got: %zu\n", rcvr->rfd, received);
	if (rcvr->sfd >= 0) {
		dprintf(2, "%d sent: %zu\n", rcvr->sfd, sent);
		close(rcvr->sfd);
	}

	return NULL;
}

static void tcp_socketpair(int sv[2], int accept_flags)
{
	struct sockaddr_in addr;
	socklen_t addrlen = sizeof(addr);
	int l = socket(PF_INET, SOCK_STREAM, 0);
	int c = socket(PF_INET, SOCK_STREAM, 0);
	int a;

	addr.sin_family = AF_INET;
	addr.sin_addr.s_addr = INADDR_ANY;
	addr.sin_port = 0;
	assert(0 == bind(l, (struct sockaddr*)&addr, addrlen));
	assert(0 == listen(l, 1024));
	assert(0 == getsockname(l, (struct sockaddr *)&addr, &addrlen));
	assert(0 == connect(c, (struct sockaddr *)&addr, addrlen));
	a = accept4(l, NULL, NULL, accept_flags);
	assert(a >= 0);
	close(l);
	sv[0] = a;
	sv[1] = c;
}

int main(int argc, char *argv[])
{
	int pair_a[2];
	int pair_b[2];
	pthread_t s, rs, r;
	struct receiver recv_only;
	struct receiver recv_send;

	if (argc == 2 && strcmp(argv[1], "unix") == 0) {
		int val;
		assert(0 == socketpair(AF_UNIX, SOCK_STREAM, 0, pair_a));
		assert(0 == socketpair(AF_UNIX, SOCK_STREAM, 0, pair_b));
		/* only make the receiver non-blocking */
		val = 1;
		assert(0 == ioctl(pair_a[0], FIONBIO, &val));
		val = 1;
		assert(0 == ioctl(pair_b[0], FIONBIO, &val));
	} else {
		tcp_socketpair(pair_a, SOCK_NONBLOCK);
		tcp_socketpair(pair_b, SOCK_NONBLOCK);
	}

	recv_send.rfd = pair_a[0];
	recv_send.sfd = pair_b[1];
	recv_only.rfd = pair_b[0];
	recv_only.sfd = -1;

	/*
	 * data flow:
	 * send_loop ->   recv_loop(recv_send)   -> recv_loop(recv_only)
	 * pair_a[1] -> (pair_a[0] -> pair_b[1]) -> pair_b[0]
	 */
	assert(0 == pthread_create(&r, NULL, recv_loop, &recv_only));
	assert(0 == pthread_create(&rs, NULL, recv_loop, &recv_send));
	assert(0 == pthread_create(&s, NULL, send_loop, &pair_a[1]));
	assert(0 == pthread_join(s, NULL));
	assert(0 == pthread_join(rs, NULL));
	assert(0 == pthread_join(r, NULL));

	return 0;
}
-------------------------------- 8< ------------------------------------
Any help/suggestions/test patches would be greatly appreciated.
Thanks for reading!

-- 
Eric Wong

             reply	other threads:[~2012-12-28  1:45 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-28  1:45 Eric Wong [this message]
2012-12-28  7:06 ` ppoll() stuck on POLLIN while TCP peer is sending Eric Wong
2012-12-29 11:34   ` Eric Wong
2012-12-31 13:21 ` [PATCH] poll: prevent missed events if _qproc is NULL Eric Wong
2012-12-31 23:24   ` Eric Wong
2013-01-01 16:58     ` Junchang(Jason) Wang
2013-01-01 18:42   ` Eric Dumazet
2013-01-01 21:00     ` Eric Wong
2013-01-01 21:17       ` Eric Wong
2013-01-01 22:53         ` Linus Torvalds
2013-01-01 23:21           ` Junchang(Jason) Wang
2013-01-01 23:56           ` [PATCH] epoll: prevent missed events on EPOLL_CTL_MOD Eric Wong
2013-01-02 17:45             ` Eric Dumazet
2013-01-02 18:40               ` Eric Wong
2013-01-02 19:03                 ` Eric Dumazet
2013-01-02 19:32                   ` Eric Wong
2013-01-02 22:08                     ` Eric Dumazet
2013-01-02 21:16             ` Eric Wong
2013-01-02 20:08 ` ppoll() stuck on POLLIN while TCP peer is sending Eric Wong
2013-01-02 20:08   ` Eric Wong
2013-01-02 20:47   ` Eric Wong
2013-01-02 20:47     ` Eric Wong
2013-01-03 13:41     ` Eric Dumazet
2013-01-03 13:41       ` Eric Dumazet
2013-01-03 18:32       ` Eric Wong
2013-01-03 18:32         ` Eric Wong
2013-01-03 23:45         ` Eric Wong
2013-01-03 23:45           ` Eric Wong
2013-01-04  0:26           ` Eric Wong
2013-01-04  0:26             ` Eric Wong
2013-01-04  3:52             ` Eric Wong
2013-01-04  3:52               ` Eric Wong
2013-01-04 16:01   ` Mel Gorman
2013-01-04 16:01     ` Mel Gorman
2013-01-04 17:15     ` Eric Dumazet
2013-01-04 17:15       ` Eric Dumazet
2013-01-04 17:59     ` Eric Wong
2013-01-04 17:59       ` Eric Wong
2013-01-05  1:07     ` Eric Wong
2013-01-05  1:07       ` Eric Wong
2013-01-06 12:07     ` Eric Wong
2013-01-06 12:07       ` Eric Wong
2013-01-07 12:25       ` Mel Gorman
2013-01-07 12:25         ` Mel Gorman
2013-01-07 22:38         ` Eric Dumazet
2013-01-07 22:38           ` Eric Dumazet
2013-01-08  0:21           ` Eric Wong
2013-01-08  0:21             ` Eric Wong
2013-01-07 22:38         ` Eric Wong
2013-01-07 22:38           ` Eric Wong
2013-01-08 20:14           ` Eric Wong
2013-01-08 20:14             ` Eric Wong
2013-01-08 22:43           ` Mel Gorman
2013-01-08 22:43             ` Mel Gorman
2013-01-08 23:23             ` Eric Wong
2013-01-08 23:23               ` Eric Wong
2013-01-09  2:14               ` Eric Dumazet
2013-01-09  2:14                 ` Eric Dumazet
2013-01-09  2:32                 ` Eric Dumazet
2013-01-09  2:32                   ` Eric Dumazet
2013-01-09  2:54                   ` Eric Dumazet
2013-01-09  2:54                     ` Eric Dumazet
2013-01-09  3:55                     ` Eric Wong
2013-01-09  3:55                       ` Eric Wong
2013-01-09  8:42                       ` Eric Wong
2013-01-09  8:42                         ` Eric Wong
2013-01-09  8:51                         ` Eric Wong
2013-01-09  8:51                           ` Eric Wong
2013-01-09 13:42                   ` Mel Gorman
2013-01-09 13:42                     ` Mel Gorman
2013-01-09 13:37               ` Mel Gorman
2013-01-09 13:37                 ` Mel Gorman
2013-01-09 13:50                 ` Mel Gorman
2013-01-09 13:50                   ` Mel Gorman
2013-01-10  9:25                 ` Eric Wong
2013-01-10  9:25                   ` Eric Wong
2013-01-10 19:42                   ` Mel Gorman
2013-01-10 19:42                     ` Mel Gorman
2013-01-10 20:03                     ` Eric Wong
2013-01-10 20:03                       ` Eric Wong
2013-01-10 20:58                     ` Eric Dumazet
2013-01-10 20:58                       ` Eric Dumazet
2013-01-11  0:51                     ` Eric Wong
2013-01-11  0:51                       ` Eric Wong
2013-01-11  9:30                       ` Mel Gorman
2013-01-11  9:30                         ` Mel Gorman
2013-01-09 21:29             ` Eric Wong
2013-01-09 21:29               ` Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121228014503.GA5017@dcvr.yhbt.net \
    --to=normalperson@yhbt.net \
    --cc=andreas.voellmy@yale.edu \
    --cc=junchang.wang@yale.edu \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.