* Issues with INET sockets through loopback (lo)
@ 2005-05-23 11:17 Hans Henrik Happe
2005-05-23 12:09 ` DervishD
2005-05-24 12:23 ` Avi Kivity
0 siblings, 2 replies; 5+ messages in thread
From: Hans Henrik Happe @ 2005-05-23 11:17 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 2209 bytes --]
During development of a token-based distributed mutual exclusion algorithm I
observed some odd behavior when testing the code locally on one machine.
When multiple processes communicate through INET sockets in an irregular
pattern Linux goes into the idle state even though there always are data to
be delivered. It doesn't stop, it just doesn't use all the available CPU
time.
To test this further i wrote a program (attach: random-inet.c) that shows this
behavior. It starts a number processes and connects them with INET sockets.
Then n startup messages are sent. When a process receives a message it
randomly selects a destination to forward it to. This way there will always
be n messages in transit. The issues can be observed with just 3 processes
and 1 message. Usage:
random-init <# processes> <# messages>
I.e. with 16 processes and 1 message the CPU utilization is only 20% on a
1.6GHz Celeron M.
I have tried more regular communication patterns but this gives full CPU
utilization as expected. For instance sending messages in a ring (attach:
ring-inet.c).
I discovered another issue when using many messages (i.e. 16 processes and 16
messages). The responsiveness of the system degrades massively. It takes
seconds before keyboard input are displayed. Of cause there are many very IO
bound processes, but I'm not sure if the impact should be that high.
I have observed the issues with many kernel versions (uniprocessor): 2.4.24,
2.6.3-7mdk, 2.6.11-gentoo-r6 and 2.6.12-rc4.
As a sanity check I have also tried with UNIX sockets (socketpair(2)). This
shows none of the above issues.
I believe that the problem must be somewhere in the INET socket
implementation. The reason that I don't think it is in the loopback, is that
when run in a cluster there seam to be more latency than one would expect. I
haven't tested this thoroughly, though.
I have tried to look at the kernel code in order to find the reason for this
behavior, but I must admit that my knowledge of the inner workings of the
kernel is not that great.
I hope that others can comfirm that this is an issue or otherwise explain why
it is supposed behave this way.
Regards
Hans Henrik Happe
[-- Attachment #2: random-inet.c --]
[-- Type: text/x-csrc, Size: 4659 bytes --]
/*
* usage: random-inet <# processes> <# messages>
*/
#include <asm/msr.h>
#include <stdio.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <netdb.h>
int do_connect(int port) {
int n, sock, on=1;
struct addrinfo hints, *res;
char str[6];
void *adr;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_flags = AI_PASSIVE;
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
sprintf(str, "%d", port);
n = getaddrinfo("localhost", str, &hints, &res);
if (n != 0) {
fprintf(stderr,
"getaddrinfo error: [%s]\n",
gai_strerror(n));
return -1;
}
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
return -1;
}
if (setsockopt(sock, SOL_TCP, TCP_NODELAY, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
if (connect(sock, (struct sockaddr *)res->ai_addr, sizeof(*res->ai_addr)) == -1) {
perror("connect");
return -1;
}
freeaddrinfo(res);
return sock;
}
int start_listen(int port) {
int n, on=1;
int sock;
struct sockaddr_in name;
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
return -1;
}
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
if (bind (sock, (struct sockaddr *) &name, sizeof (name)) == -1) {
perror("bind");
return -1;
}
if (listen(sock, 10) == -1) {
perror("listen");
return -1;
}
return sock;
}
int do_accept(int lsock) {
struct sockaddr addr;
socklen_t len = sizeof(addr);
int sock, on=1;
if ((sock = accept(lsock, &addr, &len)) == -1) {
perror("accept");
return -1;
}
if (setsockopt(sock, SOL_TCP, TCP_NODELAY, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
return sock;
}
int do_read(int fd, void *buf, int n) {
n = read(fd, buf, n);
if (n == -1) {
perror("read");
}
return n;
}
int do_write(int fd, void *buf, int n) {
n = write(fd, buf, n);
if (n == -1) {
perror("write");
}
return n;
}
int main(int argc, char *argv[]) {
int i, n, cnt, pid, dest;
int lsock;
char data, id, rank;
int port = 11100;
/* # processes */
cnt = atoi(argv[1]);
/* # messages */
n = atoi(argv[2]);
{
int socks[cnt];
struct pollfd pfds[cnt-1];
/* Create processes */
rank = 0;
for (i=1; i<cnt; i++) {
pid = fork();
if (pid == 0) {
rank=cnt-i;
break;
}
}
/* Setup connections */
lsock = start_listen(port+rank);
sleep(2); /* "Ensure" that all processes are listening, HACK!!! */
for (i=0; i<rank; i++) {
pfds[i].fd = do_accept(lsock);
do_read(pfds[i].fd, &id, 1);
do_write(pfds[i].fd, &rank, 1);
socks[id] = pfds[i].fd;
pfds[i].events = POLLIN;
}
for (i=rank; i<cnt-1; i++) {
pfds[i].fd = do_connect(port+i+1);
do_write(pfds[i].fd, &rank, 1);
do_read(pfds[i].fd, &id, 1);
socks[id] = pfds[i].fd;
pfds[i].events = POLLIN;
}
srandom(rank);
/* Write startup messages */
if (rank < n) {
dest = (rank+1)%cnt;
do_write(socks[dest], &data, 1);
}
/* Receive and forward messages to random destinations */
while (1) {
if (poll(pfds, cnt-1, -1) == -1) {
perror("poll");
}
for (i=0; i<cnt-1; i++) {
if (pfds[i].revents != 0) {
do_read(pfds[i].fd, &data, 1);
dest = random()%cnt;
/* Do not send to self */
if (dest == rank) {
dest = (rank+1)%cnt;
}
do_write(socks[dest], &data, 1);
}
}
}
}
return 0;
}
[-- Attachment #3: ring-inet.c --]
[-- Type: text/x-csrc, Size: 4482 bytes --]
/*
* usage: ring-inet <# processes> <# messages>
*/
#include <asm/msr.h>
#include <stdio.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <netdb.h>
int do_connect(int port) {
int n, sock, on=1;
struct addrinfo hints, *res;
char str[6];
void *adr;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_flags = AI_PASSIVE;
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
sprintf(str, "%d", port);
n = getaddrinfo("localhost", str, &hints, &res);
if (n != 0) {
fprintf(stderr,
"getaddrinfo error: [%s]\n",
gai_strerror(n));
return -1;
}
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
return -1;
}
if (setsockopt(sock, SOL_TCP, TCP_NODELAY, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
if (connect(sock, (struct sockaddr *)res->ai_addr, sizeof(*res->ai_addr)) == -1) {
perror("connect");
return -1;
}
freeaddrinfo(res);
return sock;
}
int start_listen(int port) {
int n, on=1;
int sock;
struct sockaddr_in name;
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock == -1) {
perror("socket");
return -1;
}
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
if (bind (sock, (struct sockaddr *) &name, sizeof (name)) == -1) {
perror("bind");
return -1;
}
if (listen(sock, 10) == -1) {
perror("listen");
return -1;
}
return sock;
}
int do_accept(int lsock) {
struct sockaddr addr;
socklen_t len = sizeof(addr);
int sock, on=1;
if ((sock = accept(lsock, &addr, &len)) == -1) {
perror("accept");
return -1;
}
if (setsockopt(sock, SOL_TCP, TCP_NODELAY, &on, sizeof(on)) == -1) {
perror("setsockopt");
return -1;
}
return sock;
}
int do_read(int fd, void *buf, int n) {
n = read(fd, buf, n);
if (n == -1) {
perror("read");
}
return n;
}
int do_write(int fd, void *buf, int n) {
n = write(fd, buf, n);
if (n == -1) {
perror("write");
}
return n;
}
int main(int argc, char *argv[]) {
int i, n, cnt, pid, dest;
int lsock;
char data, id, rank;
int port = 11100;
/* # processes */
cnt = atoi(argv[1]);
/* # messages */
n = atoi(argv[2]);
{
int socks[cnt];
struct pollfd pfds[cnt-1];
/* Create processes */
rank = 0;
for (i=1; i<cnt; i++) {
pid = fork();
if (pid == 0) {
rank=cnt-i;
break;
}
}
/* Setup connections */
lsock = start_listen(port+rank);
sleep(2); /* "Ensure" that all processes are listening, HACK!!! */
for (i=0; i<rank; i++) {
pfds[i].fd = do_accept(lsock);
do_read(pfds[i].fd, &id, 1);
do_write(pfds[i].fd, &rank, 1);
socks[id] = pfds[i].fd;
pfds[i].events = POLLIN;
}
for (i=rank; i<cnt-1; i++) {
pfds[i].fd = do_connect(port+i+1);
do_write(pfds[i].fd, &rank, 1);
do_read(pfds[i].fd, &id, 1);
socks[id] = pfds[i].fd;
pfds[i].events = POLLIN;
}
srandom(rank);
/* Write startup messages */
if (rank < n) {
dest = (rank+1)%cnt;
do_write(socks[dest], &data, 1);
}
/* Receive and forward messages to next in ring */
while (1) {
if (poll(pfds, cnt-1, -1) == -1) {
perror("poll");
}
for (i=0; i<cnt-1; i++) {
if (pfds[i].revents != 0) {
do_read(pfds[i].fd, &data, 1);
dest = (rank+1)%cnt;
do_write(socks[dest], &data, 1);
}
}
}
}
return 0;
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Issues with INET sockets through loopback (lo)
2005-05-23 11:17 Issues with INET sockets through loopback (lo) Hans Henrik Happe
@ 2005-05-23 12:09 ` DervishD
2005-05-24 12:12 ` Hans Henrik Happe
2005-05-24 12:23 ` Avi Kivity
1 sibling, 1 reply; 5+ messages in thread
From: DervishD @ 2005-05-23 12:09 UTC (permalink / raw)
To: Hans Henrik Happe; +Cc: linux-kernel
Hi Hans :)
I've not read the code in order to not make assumptions about
proper parameters or how to make the tests. I've tested using an AMD
Athlon XP 1900+, using a self compiled 2.4.29 kernel. The measures
were made using zsh 'time'. Not quite exact but I think that's good
for comparisons anyway.
* Hans Henrik Happe <hhh@imada.sdu.dk> dixit:
> To test this further i wrote a program (attach: random-inet.c) that shows this
> behavior. It starts a number processes and connects them with INET sockets.
> Then n startup messages are sent. When a process receives a message it
> randomly selects a destination to forward it to. This way there will always
> be n messages in transit. The issues can be observed with just 3 processes
> and 1 message. Usage:
>
> random-init <# processes> <# messages>
With 3-1 I get an usage of 20% more or less. But with 16-1 the
CPU usage is nearly 0! and with 16-16 the usage is 5% more or less.
> I have tried more regular communication patterns but this gives full CPU
> utilization as expected. For instance sending messages in a ring (attach:
> ring-inet.c).
Not here. It uses 29% instead of 20% with 3-1, but drops to 6%
when using 16 processes. Far from full CPU usage. A test with 16-160
doesn't make the system slower or irresponsive, at least here...
> I discovered another issue when using many messages (i.e. 16 processes and 16
> messages). The responsiveness of the system degrades massively. It takes
> seconds before keyboard input are displayed. Of cause there are many very IO
> bound processes, but I'm not sure if the impact should be that high.
Not here. I haven't noticed any slow-down or latency increase
using high number of messages. Using 16-160 only uses at most 7% of
CPU per process, and I don't feel the system irresponsive.
If you want more accurate results, try to modify your test
programs: make them run for a couple of minutes (you decide how much
time, the longer, the better) and kill all children processes. After
that, use getrusage() (with RUSAGE_CHILDREN) or wait3(). That should
give more accurate results.
Hope that helps. If you want to make any other test, tell me.
I'll try to help.
Raúl Núñez de Arenas Coronado
--
Linux Registered User 88736 | http://www.dervishd.net
http://www.pleyades.net & http://www.gotesdelluna.net
It's my PC and I'll cry if I want to...
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Issues with INET sockets through loopback (lo)
2005-05-23 12:09 ` DervishD
@ 2005-05-24 12:12 ` Hans Henrik Happe
0 siblings, 0 replies; 5+ messages in thread
From: Hans Henrik Happe @ 2005-05-24 12:12 UTC (permalink / raw)
To: DervishD; +Cc: linux-kernel
On Monday 23 May 2005 14:09, DervishD wrote:
> With 3-1 I get an usage of 20% more or less. But with 16-1 the
> CPU usage is nearly 0! and with 16-16 the usage is 5% more or less.
That even worse than what I have experienced.
> > I have tried more regular communication patterns but this gives full CPU
> > utilization as expected. For instance sending messages in a ring (attach:
> > ring-inet.c).
>
> Not here. It uses 29% instead of 20% with 3-1, but drops to 6%
> when using 16 processes. Far from full CPU usage. A test with 16-160
> doesn't make the system slower or irresponsive, at least here...
Again, even worse.
> Not here. I haven't noticed any slow-down or latency increase
> using high number of messages. Using 16-160 only uses at most 7% of
> CPU per process, and I don't feel the system irresponsive.
That's strange. Maybe I should try an AMD system myself. Btw the number of
processes is an upper bound of the number of messages. This is just a
simplification in the code.
> If you want more accurate results, try to modify your test
> programs: make them run for a couple of minutes (you decide how much
> time, the longer, the better) and kill all children processes. After
> that, use getrusage() (with RUSAGE_CHILDREN) or wait3(). That should
> give more accurate results.
I could do that, but my point is that kernel goes into the idle state even
though there always should be a runable process. Your tests supports this.
I don't believe that more accuracy would help because it is quite clear that
CPU is in the idle state.
> Hope that helps. If you want to make any other test, tell me.
> I'll try to help.
Thanx. Your tests actually confirms the first issue, which also is the one
that I have been most concerned about.
I hope that someone with knowledge of how this part of the kernel work can
confirm that this is a problem with the kernel or explain why it is supposed
to behave in this manor.
Hans Henrik Happe
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Issues with INET sockets through loopback (lo)
2005-05-23 11:17 Issues with INET sockets through loopback (lo) Hans Henrik Happe
2005-05-23 12:09 ` DervishD
@ 2005-05-24 12:23 ` Avi Kivity
2005-05-31 16:18 ` Hans Henrik Happe
1 sibling, 1 reply; 5+ messages in thread
From: Avi Kivity @ 2005-05-24 12:23 UTC (permalink / raw)
To: Hans Henrik Happe; +Cc: linux-kernel
On Mon, 2005-05-23 at 13:17 +0200, Hans Henrik Happe wrote:
> I hope that others can comfirm that this is an issue or otherwise explain why
> it is supposed behave this way.
>
you might try using udp instead of tcp. this would help determine
whether the problem is in the tcp stack or the loopback interface.
nagle´s algorithm was my initial suspect but I see you took care of
that.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Issues with INET sockets through loopback (lo)
2005-05-24 12:23 ` Avi Kivity
@ 2005-05-31 16:18 ` Hans Henrik Happe
0 siblings, 0 replies; 5+ messages in thread
From: Hans Henrik Happe @ 2005-05-31 16:18 UTC (permalink / raw)
To: Avi Kivity; +Cc: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 650 bytes --]
On Tuesday 24 May 2005 14:23, Avi Kivity wrote:
> On Mon, 2005-05-23 at 13:17 +0200, Hans Henrik Happe wrote:
>
> > I hope that others can comfirm that this is an issue or otherwise explain
> > why it is supposed behave this way.
> >
>
> you might try using udp instead of tcp. this would help determine
> whether the problem is in the tcp stack or the loopback interface.
Now I have tried with SCTP and it works great (no idle CPU time).
So my guess is still that there is a problem in TCP.
I have attached the SCTP program. It's my first attempt at using SCTP, so it's
not beautiful. The messages get around as expected though.
TripleH ;-)
[-- Attachment #2: random-sctp.c --]
[-- Type: text/x-csrc, Size: 3533 bytes --]
/*
* usage: random-inet <# processes> <# messages>
*/
#include <asm/msr.h>
#include <stdio.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/sctp.h>
typedef struct {
struct sockaddr sockadr;
int len;
} adr_t;
int get_adr(adr_t *adr, int port) {
int n;
struct addrinfo hints, *res;
char str[6];
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_flags = AI_PASSIVE;
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
sprintf(str, "%d", port);
n = getaddrinfo("localhost", str, &hints, &res);
if (n != 0) {
fprintf(stderr,
"getaddrinfo error: [%s]\n",
gai_strerror(n));
return -1;
}
memcpy(&adr->sockadr, res->ai_addr, sizeof(*res->ai_addr));
adr->len = sizeof(*res->ai_addr);
freeaddrinfo(res);
return 0;
}
int init_listen(int port) {
int n, on=1;
int sock;
struct sockaddr_in name;
sock = socket(PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
if (sock == -1) {
perror("socket");
return -1;
}
name.sin_family = PF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
if (bind (sock, (struct sockaddr *) &name, sizeof (name)) == -1) {
perror("bind");
return -1;
}
if (listen(sock, 10) == -1) {
perror("listen");
return -1;
}
return sock;
}
int do_recv(int sock, void *buf, int n) {
struct sockaddr sa;
struct sctp_sndrcvinfo info;
int slen, flags;
slen = sizeof(sa);
n = sctp_recvmsg(sock, buf, n, &sa, &slen, &info, &flags);
// n = sctp_recvmsg(sock, buf, n, &sa, &slen, &info, &flags);
if (n == -1) {
perror("recv");
}
return n;
}
int do_send(int sock, adr_t *adr, void *buf, int n) {
n = sctp_sendmsg(sock, buf, n, &adr->sockadr, adr->len, 666, MSG_ADDR_OVER, 0, 0, 444);
if (n == -1) {
perror("send");
}
return n;
}
int main(int argc, char *argv[]) {
int i, n, cnt, pid, dest;
int lsock;
char data, id, rank;
int port = 11100;
uint64_t t0, t1;
/* # processes */
cnt = atoi(argv[1]);
/* # messages */
n = atoi(argv[2]);
{
adr_t dests[cnt];
/* Create processes */
rank = 0;
for (i=1; i<cnt; i++) {
pid = fork();
if (pid == 0) {
rank=cnt-i;
break;
}
}
/* Setup connections */
lsock = init_listen(port+rank);
sleep(2); /* "Ensure" that all processes are listening, HACK!!! */
for (i=0; i<cnt; i++) {
get_adr(dests+i, port+i);
}
srandom(rank);
/* Write startup messages */
if (rank < n) {
do_send(lsock, &dests[(rank+1)%cnt], &data, 1);
}
sleep(1);
/* Receive and forward messages to random destinations */
while (1) {
do_recv(lsock, &data, 1);
dest = random()%cnt;
/* Do not send to self */
if (dest == rank) {
dest = (dest+1)%cnt;
}
do_send(lsock, &dests[dest], &data, 1);
}
}
return 0;
}
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2005-05-31 16:50 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-23 11:17 Issues with INET sockets through loopback (lo) Hans Henrik Happe
2005-05-23 12:09 ` DervishD
2005-05-24 12:12 ` Hans Henrik Happe
2005-05-24 12:23 ` Avi Kivity
2005-05-31 16:18 ` Hans Henrik Happe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox